PR target/61271
[official-gcc.git] / gcc / config / i386 / i386.c
blob88272567c5661fc32c3b64e0fa5aba50db155c4e
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "wide-int.h"
82 #include "context.h"
83 #include "pass_manager.h"
84 #include "target-globals.h"
85 #include "tree-vectorizer.h"
86 #include "shrink-wrap.h"
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2135 /* The "default" register map used in 32bit mode. */
2137 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2151 /* The "default" register map used in 64bit mode. */
2153 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2211 numbers.
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2221 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2235 /* Define parameter passing and return registers. */
2237 static int const x86_64_int_parameter_registers[6] =
2239 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2242 static int const x86_64_ms_abi_int_parameter_registers[4] =
2244 CX_REG, DX_REG, R8_REG, R9_REG
2247 static int const x86_64_int_return_registers[4] =
2249 AX_REG, DX_REG, DI_REG, SI_REG
2252 /* Additional registers that are clobbered by SYSV calls. */
2254 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2256 SI_REG, DI_REG,
2257 XMM6_REG, XMM7_REG,
2258 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2259 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2262 /* Define the structure for the machine field in struct function. */
2264 struct GTY(()) stack_local_entry {
2265 unsigned short mode;
2266 unsigned short n;
2267 rtx rtl;
2268 struct stack_local_entry *next;
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2274 [arguments]
2275 <- ARG_POINTER
2276 saved pc
2278 saved static chain if ix86_static_chain_on_stack
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2282 [saved regs]
2283 <- regs_save_offset
2284 [padding0]
2286 [saved SSE regs]
2287 <- sse_regs_save_offset
2288 [padding1] |
2289 | <- FRAME_POINTER
2290 [va_arg registers] |
2292 [frame] |
2294 [padding2] | = to_allocate
2295 <- STACK_POINTER
2297 struct ix86_frame
2299 int nsseregs;
2300 int nregs;
2301 int va_arg_size;
2302 int red_zone_size;
2303 int outgoing_arguments_size;
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset;
2307 HOST_WIDE_INT hard_frame_pointer_offset;
2308 HOST_WIDE_INT stack_pointer_offset;
2309 HOST_WIDE_INT hfp_save_offset;
2310 HOST_WIDE_INT reg_save_offset;
2311 HOST_WIDE_INT sse_reg_save_offset;
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov;
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule;
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune;
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch;
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse;
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string[]
2332 = "force_align_arg_pointer";
2334 static rtx (*ix86_gen_leave) (void);
2335 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2338 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2339 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2342 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary;
2350 /* Alignment for incoming stack boundary in bits specified at
2351 command line. */
2352 static unsigned int ix86_user_incoming_stack_boundary;
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary;
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary;
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node;
2362 static GTY(()) tree ms_va_list_type_node;
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix[16];
2366 int internal_label_prefix_len;
2368 /* Fence to use after loop using movnt. */
2369 tree x86_mfence;
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2380 X86_64_NO_CLASS,
2381 X86_64_INTEGER_CLASS,
2382 X86_64_INTEGERSI_CLASS,
2383 X86_64_SSE_CLASS,
2384 X86_64_SSESF_CLASS,
2385 X86_64_SSEDF_CLASS,
2386 X86_64_SSEUP_CLASS,
2387 X86_64_X87_CLASS,
2388 X86_64_X87UP_CLASS,
2389 X86_64_COMPLEX_X87_CLASS,
2390 X86_64_MEMORY_CLASS
2393 #define MAX_CLASSES 8
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2397 static bool ext_80387_constants_init = 0;
2400 static struct machine_function * ix86_init_machine_status (void);
2401 static rtx ix86_function_value (const_tree, const_tree, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2404 const_tree);
2405 static rtx ix86_static_chain (const_tree, bool);
2406 static int ix86_function_regparm (const_tree, const_tree);
2407 static void ix86_compute_frame_layout (struct ix86_frame *);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2409 rtx, rtx, int);
2410 static void ix86_add_new_builtins (HOST_WIDE_INT);
2411 static tree ix86_canonical_va_list_type (tree);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2416 enum ix86_function_specific_strings
2418 IX86_FUNCTION_SPECIFIC_ARCH,
2419 IX86_FUNCTION_SPECIFIC_TUNE,
2420 IX86_FUNCTION_SPECIFIC_MAX
2423 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2424 const char *, enum fpmath_unit, bool);
2425 static void ix86_function_specific_save (struct cl_target_option *,
2426 struct gcc_options *opts);
2427 static void ix86_function_specific_restore (struct gcc_options *opts,
2428 struct cl_target_option *);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option *);
2431 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2433 struct gcc_options *,
2434 struct gcc_options *,
2435 struct gcc_options *);
2436 static bool ix86_can_inline_p (tree, tree);
2437 static void ix86_set_current_function (tree);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2440 static enum calling_abi ix86_function_abi (const_tree);
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2445 #endif
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted;
2449 static int ix86_arch_specified;
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2454 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2457 /* Processor target table, indexed by processor number */
2458 struct ptt
2460 const char *const name; /* processor name */
2461 const struct processor_costs *cost; /* Processor costs */
2462 const int align_loop; /* Default alignments. */
2463 const int align_loop_max_skip;
2464 const int align_jump;
2465 const int align_jump_max_skip;
2466 const int align_func;
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table[PROCESSOR_max] =
2472 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2499 static unsigned int
2500 rest_of_handle_insert_vzeroupper (void)
2502 int i;
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i = 0; i < MAX_386_ENTITIES; i++)
2509 ix86_optimize_mode_switching[i] = 0;
2511 ix86_optimize_mode_switching[AVX_U128] = 1;
2513 /* Call optimize_mode_switching. */
2514 g->get_passes ()->execute_pass_mode_switching ();
2515 return 0;
2518 namespace {
2520 const pass_data pass_data_insert_vzeroupper =
2522 RTL_PASS, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE, /* optinfo_flags */
2525 true, /* has_execute */
2526 TV_NONE, /* tv_id */
2527 0, /* properties_required */
2528 0, /* properties_provided */
2529 0, /* properties_destroyed */
2530 0, /* todo_flags_start */
2531 TODO_df_finish, /* todo_flags_finish */
2534 class pass_insert_vzeroupper : public rtl_opt_pass
2536 public:
2537 pass_insert_vzeroupper(gcc::context *ctxt)
2538 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2541 /* opt_pass methods: */
2542 virtual bool gate (function *)
2544 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2547 virtual unsigned int execute (function *)
2549 return rest_of_handle_insert_vzeroupper ();
2552 }; // class pass_insert_vzeroupper
2554 } // anon namespace
2556 rtl_opt_pass *
2557 make_pass_insert_vzeroupper (gcc::context *ctxt)
2559 return new pass_insert_vzeroupper (ctxt);
2562 /* Return true if a red-zone is in use. */
2564 static inline bool
2565 ix86_using_red_zone (void)
2567 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2570 /* Return a string that documents the current -m options. The caller is
2571 responsible for freeing the string. */
2573 static char *
2574 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2575 const char *tune, enum fpmath_unit fpmath,
2576 bool add_nl_p)
2578 struct ix86_target_opts
2580 const char *option; /* option string */
2581 HOST_WIDE_INT mask; /* isa mask options */
2584 /* This table is ordered so that options like -msse4.2 that imply
2585 preceding options while match those first. */
2586 static struct ix86_target_opts isa_opts[] =
2588 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2589 { "-mfma", OPTION_MASK_ISA_FMA },
2590 { "-mxop", OPTION_MASK_ISA_XOP },
2591 { "-mlwp", OPTION_MASK_ISA_LWP },
2592 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2593 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2594 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2595 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2596 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2597 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2598 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2599 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2600 { "-msse3", OPTION_MASK_ISA_SSE3 },
2601 { "-msse2", OPTION_MASK_ISA_SSE2 },
2602 { "-msse", OPTION_MASK_ISA_SSE },
2603 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2604 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2605 { "-mmmx", OPTION_MASK_ISA_MMX },
2606 { "-mabm", OPTION_MASK_ISA_ABM },
2607 { "-mbmi", OPTION_MASK_ISA_BMI },
2608 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2609 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2610 { "-mhle", OPTION_MASK_ISA_HLE },
2611 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2612 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2613 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2614 { "-madx", OPTION_MASK_ISA_ADX },
2615 { "-mtbm", OPTION_MASK_ISA_TBM },
2616 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2617 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2618 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2619 { "-maes", OPTION_MASK_ISA_AES },
2620 { "-msha", OPTION_MASK_ISA_SHA },
2621 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2622 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2623 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2624 { "-mf16c", OPTION_MASK_ISA_F16C },
2625 { "-mrtm", OPTION_MASK_ISA_RTM },
2626 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2627 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2628 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2629 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2630 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2631 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2634 /* Flag options. */
2635 static struct ix86_target_opts flag_opts[] =
2637 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2638 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2639 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2640 { "-m80387", MASK_80387 },
2641 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2642 { "-malign-double", MASK_ALIGN_DOUBLE },
2643 { "-mcld", MASK_CLD },
2644 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2645 { "-mieee-fp", MASK_IEEE_FP },
2646 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2647 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2648 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2649 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2650 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2651 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2652 { "-mno-red-zone", MASK_NO_RED_ZONE },
2653 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2654 { "-mrecip", MASK_RECIP },
2655 { "-mrtd", MASK_RTD },
2656 { "-msseregparm", MASK_SSEREGPARM },
2657 { "-mstack-arg-probe", MASK_STACK_PROBE },
2658 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2659 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2660 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2661 { "-mvzeroupper", MASK_VZEROUPPER },
2662 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2663 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2664 { "-mprefer-avx128", MASK_PREFER_AVX128},
2667 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2669 char isa_other[40];
2670 char target_other[40];
2671 unsigned num = 0;
2672 unsigned i, j;
2673 char *ret;
2674 char *ptr;
2675 size_t len;
2676 size_t line_len;
2677 size_t sep_len;
2678 const char *abi;
2680 memset (opts, '\0', sizeof (opts));
2682 /* Add -march= option. */
2683 if (arch)
2685 opts[num][0] = "-march=";
2686 opts[num++][1] = arch;
2689 /* Add -mtune= option. */
2690 if (tune)
2692 opts[num][0] = "-mtune=";
2693 opts[num++][1] = tune;
2696 /* Add -m32/-m64/-mx32. */
2697 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2699 if ((isa & OPTION_MASK_ABI_64) != 0)
2700 abi = "-m64";
2701 else
2702 abi = "-mx32";
2703 isa &= ~ (OPTION_MASK_ISA_64BIT
2704 | OPTION_MASK_ABI_64
2705 | OPTION_MASK_ABI_X32);
2707 else
2708 abi = "-m32";
2709 opts[num++][0] = abi;
2711 /* Pick out the options in isa options. */
2712 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2714 if ((isa & isa_opts[i].mask) != 0)
2716 opts[num++][0] = isa_opts[i].option;
2717 isa &= ~ isa_opts[i].mask;
2721 if (isa && add_nl_p)
2723 opts[num++][0] = isa_other;
2724 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2725 isa);
2728 /* Add flag options. */
2729 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2731 if ((flags & flag_opts[i].mask) != 0)
2733 opts[num++][0] = flag_opts[i].option;
2734 flags &= ~ flag_opts[i].mask;
2738 if (flags && add_nl_p)
2740 opts[num++][0] = target_other;
2741 sprintf (target_other, "(other flags: %#x)", flags);
2744 /* Add -fpmath= option. */
2745 if (fpmath)
2747 opts[num][0] = "-mfpmath=";
2748 switch ((int) fpmath)
2750 case FPMATH_387:
2751 opts[num++][1] = "387";
2752 break;
2754 case FPMATH_SSE:
2755 opts[num++][1] = "sse";
2756 break;
2758 case FPMATH_387 | FPMATH_SSE:
2759 opts[num++][1] = "sse+387";
2760 break;
2762 default:
2763 gcc_unreachable ();
2767 /* Any options? */
2768 if (num == 0)
2769 return NULL;
2771 gcc_assert (num < ARRAY_SIZE (opts));
2773 /* Size the string. */
2774 len = 0;
2775 sep_len = (add_nl_p) ? 3 : 1;
2776 for (i = 0; i < num; i++)
2778 len += sep_len;
2779 for (j = 0; j < 2; j++)
2780 if (opts[i][j])
2781 len += strlen (opts[i][j]);
2784 /* Build the string. */
2785 ret = ptr = (char *) xmalloc (len);
2786 line_len = 0;
2788 for (i = 0; i < num; i++)
2790 size_t len2[2];
2792 for (j = 0; j < 2; j++)
2793 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2795 if (i != 0)
2797 *ptr++ = ' ';
2798 line_len++;
2800 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2802 *ptr++ = '\\';
2803 *ptr++ = '\n';
2804 line_len = 0;
2808 for (j = 0; j < 2; j++)
2809 if (opts[i][j])
2811 memcpy (ptr, opts[i][j], len2[j]);
2812 ptr += len2[j];
2813 line_len += len2[j];
2817 *ptr = '\0';
2818 gcc_assert (ret + len >= ptr);
2820 return ret;
2823 /* Return true, if profiling code should be emitted before
2824 prologue. Otherwise it returns false.
2825 Note: For x86 with "hotfix" it is sorried. */
2826 static bool
2827 ix86_profile_before_prologue (void)
2829 return flag_fentry != 0;
2832 /* Function that is callable from the debugger to print the current
2833 options. */
2834 void ATTRIBUTE_UNUSED
2835 ix86_debug_options (void)
2837 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2838 ix86_arch_string, ix86_tune_string,
2839 ix86_fpmath, true);
2841 if (opts)
2843 fprintf (stderr, "%s\n\n", opts);
2844 free (opts);
2846 else
2847 fputs ("<no options>\n\n", stderr);
2849 return;
2852 static const char *stringop_alg_names[] = {
2853 #define DEF_ENUM
2854 #define DEF_ALG(alg, name) #name,
2855 #include "stringop.def"
2856 #undef DEF_ENUM
2857 #undef DEF_ALG
2860 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2861 The string is of the following form (or comma separated list of it):
2863 strategy_alg:max_size:[align|noalign]
2865 where the full size range for the strategy is either [0, max_size] or
2866 [min_size, max_size], in which min_size is the max_size + 1 of the
2867 preceding range. The last size range must have max_size == -1.
2869 Examples:
2872 -mmemcpy-strategy=libcall:-1:noalign
2874 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2878 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2880 This is to tell the compiler to use the following strategy for memset
2881 1) when the expected size is between [1, 16], use rep_8byte strategy;
2882 2) when the size is between [17, 2048], use vector_loop;
2883 3) when the size is > 2048, use libcall. */
2885 struct stringop_size_range
2887 int max;
2888 stringop_alg alg;
2889 bool noalign;
2892 static void
2893 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2895 const struct stringop_algs *default_algs;
2896 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2897 char *curr_range_str, *next_range_str;
2898 int i = 0, n = 0;
2900 if (is_memset)
2901 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2902 else
2903 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2905 curr_range_str = strategy_str;
2909 int maxs;
2910 char alg_name[128];
2911 char align[16];
2912 next_range_str = strchr (curr_range_str, ',');
2913 if (next_range_str)
2914 *next_range_str++ = '\0';
2916 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2917 alg_name, &maxs, align))
2919 error ("wrong arg %s to option %s", curr_range_str,
2920 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2921 return;
2924 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2926 error ("size ranges of option %s should be increasing",
2927 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2928 return;
2931 for (i = 0; i < last_alg; i++)
2932 if (!strcmp (alg_name, stringop_alg_names[i]))
2933 break;
2935 if (i == last_alg)
2937 error ("wrong stringop strategy name %s specified for option %s",
2938 alg_name,
2939 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2940 return;
2943 input_ranges[n].max = maxs;
2944 input_ranges[n].alg = (stringop_alg) i;
2945 if (!strcmp (align, "align"))
2946 input_ranges[n].noalign = false;
2947 else if (!strcmp (align, "noalign"))
2948 input_ranges[n].noalign = true;
2949 else
2951 error ("unknown alignment %s specified for option %s",
2952 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2953 return;
2955 n++;
2956 curr_range_str = next_range_str;
2958 while (curr_range_str);
2960 if (input_ranges[n - 1].max != -1)
2962 error ("the max value for the last size range should be -1"
2963 " for option %s",
2964 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2965 return;
2968 if (n > MAX_STRINGOP_ALGS)
2970 error ("too many size ranges specified in option %s",
2971 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2972 return;
2975 /* Now override the default algs array. */
2976 for (i = 0; i < n; i++)
2978 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2979 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2980 = input_ranges[i].alg;
2981 *const_cast<int *>(&default_algs->size[i].noalign)
2982 = input_ranges[i].noalign;
2987 /* parse -mtune-ctrl= option. When DUMP is true,
2988 print the features that are explicitly set. */
2990 static void
2991 parse_mtune_ctrl_str (bool dump)
2993 if (!ix86_tune_ctrl_string)
2994 return;
2996 char *next_feature_string = NULL;
2997 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2998 char *orig = curr_feature_string;
2999 int i;
3002 bool clear = false;
3004 next_feature_string = strchr (curr_feature_string, ',');
3005 if (next_feature_string)
3006 *next_feature_string++ = '\0';
3007 if (*curr_feature_string == '^')
3009 curr_feature_string++;
3010 clear = true;
3012 for (i = 0; i < X86_TUNE_LAST; i++)
3014 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3016 ix86_tune_features[i] = !clear;
3017 if (dump)
3018 fprintf (stderr, "Explicitly %s feature %s\n",
3019 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3020 break;
3023 if (i == X86_TUNE_LAST)
3024 error ("Unknown parameter to option -mtune-ctrl: %s",
3025 clear ? curr_feature_string - 1 : curr_feature_string);
3026 curr_feature_string = next_feature_string;
3028 while (curr_feature_string);
3029 free (orig);
3032 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3033 processor type. */
3035 static void
3036 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3038 unsigned int ix86_tune_mask = 1u << ix86_tune;
3039 int i;
3041 for (i = 0; i < X86_TUNE_LAST; ++i)
3043 if (ix86_tune_no_default)
3044 ix86_tune_features[i] = 0;
3045 else
3046 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3049 if (dump)
3051 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3052 for (i = 0; i < X86_TUNE_LAST; i++)
3053 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3054 ix86_tune_features[i] ? "on" : "off");
3057 parse_mtune_ctrl_str (dump);
3061 /* Override various settings based on options. If MAIN_ARGS_P, the
3062 options are from the command line, otherwise they are from
3063 attributes. */
3065 static void
3066 ix86_option_override_internal (bool main_args_p,
3067 struct gcc_options *opts,
3068 struct gcc_options *opts_set)
3070 int i;
3071 unsigned int ix86_arch_mask;
3072 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3073 const char *prefix;
3074 const char *suffix;
3075 const char *sw;
3077 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3078 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3079 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3080 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3081 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3082 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3083 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3084 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3085 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3086 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3087 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3088 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3089 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3090 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3091 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3092 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3093 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3094 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3095 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3096 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3097 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3098 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3099 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3100 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3101 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3102 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3103 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3104 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3105 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3106 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3107 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3108 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3109 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3110 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3111 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3112 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3113 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3114 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3115 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3116 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3117 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3118 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3119 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3120 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3121 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3122 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3123 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3124 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3125 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3127 #define PTA_CORE2 \
3128 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3129 | PTA_CX16 | PTA_FXSR)
3130 #define PTA_NEHALEM \
3131 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3132 #define PTA_WESTMERE \
3133 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3134 #define PTA_SANDYBRIDGE \
3135 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3136 #define PTA_IVYBRIDGE \
3137 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3138 #define PTA_HASWELL \
3139 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3140 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3141 #define PTA_BROADWELL \
3142 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3143 #define PTA_BONNELL \
3144 (PTA_CORE2 | PTA_MOVBE)
3145 #define PTA_SILVERMONT \
3146 (PTA_WESTMERE | PTA_MOVBE)
3148 /* if this reaches 64, need to widen struct pta flags below */
3150 static struct pta
3152 const char *const name; /* processor name or nickname. */
3153 const enum processor_type processor;
3154 const enum attr_cpu schedule;
3155 const unsigned HOST_WIDE_INT flags;
3157 const processor_alias_table[] =
3159 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3160 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3161 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3162 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3163 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3164 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3165 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3166 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3167 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3168 PTA_MMX | PTA_SSE | PTA_FXSR},
3169 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3170 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3171 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3172 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3173 PTA_MMX | PTA_SSE | PTA_FXSR},
3174 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3175 PTA_MMX | PTA_SSE | PTA_FXSR},
3176 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3177 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3178 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3179 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3180 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3181 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3182 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3183 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3184 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3185 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3186 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3187 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3188 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3189 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3190 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3191 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3192 PTA_SANDYBRIDGE},
3193 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3194 PTA_SANDYBRIDGE},
3195 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3196 PTA_IVYBRIDGE},
3197 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3198 PTA_IVYBRIDGE},
3199 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3200 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3201 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3202 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3203 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3204 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3205 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3206 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3207 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3209 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3210 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3211 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3212 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3214 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3215 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3216 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3217 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3218 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3219 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3220 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3221 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3222 {"x86-64", PROCESSOR_K8, CPU_K8,
3223 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3224 {"k8", PROCESSOR_K8, CPU_K8,
3225 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3226 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3227 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3229 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3230 {"opteron", PROCESSOR_K8, CPU_K8,
3231 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3232 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3233 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3234 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3235 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3236 {"athlon64", PROCESSOR_K8, CPU_K8,
3237 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3238 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3239 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3240 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3241 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3242 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3243 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3244 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3245 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3246 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3247 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3248 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3249 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3250 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3251 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3252 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3253 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3254 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3255 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3256 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3261 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3262 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3263 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3264 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3265 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3266 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3267 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3268 | PTA_XSAVEOPT | PTA_FSGSBASE},
3269 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3270 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3271 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3272 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3273 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3274 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3275 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3276 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3277 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3278 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3279 | PTA_FXSR | PTA_XSAVE},
3280 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3281 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3282 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3283 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3284 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3285 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3287 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3288 PTA_64BIT
3289 | PTA_HLE /* flags are only used for -march switch. */ },
3292 /* -mrecip options. */
3293 static struct
3295 const char *string; /* option name */
3296 unsigned int mask; /* mask bits to set */
3298 const recip_options[] =
3300 { "all", RECIP_MASK_ALL },
3301 { "none", RECIP_MASK_NONE },
3302 { "div", RECIP_MASK_DIV },
3303 { "sqrt", RECIP_MASK_SQRT },
3304 { "vec-div", RECIP_MASK_VEC_DIV },
3305 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3308 int const pta_size = ARRAY_SIZE (processor_alias_table);
3310 /* Set up prefix/suffix so the error messages refer to either the command
3311 line argument, or the attribute(target). */
3312 if (main_args_p)
3314 prefix = "-m";
3315 suffix = "";
3316 sw = "switch";
3318 else
3320 prefix = "option(\"";
3321 suffix = "\")";
3322 sw = "attribute";
3325 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3326 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3327 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3328 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3329 #ifdef TARGET_BI_ARCH
3330 else
3332 #if TARGET_BI_ARCH == 1
3333 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3334 is on and OPTION_MASK_ABI_X32 is off. We turn off
3335 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3336 -mx32. */
3337 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3338 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3339 #else
3340 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3341 on and OPTION_MASK_ABI_64 is off. We turn off
3342 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3343 -m64. */
3344 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3345 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3346 #endif
3348 #endif
3350 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3352 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3353 OPTION_MASK_ABI_64 for TARGET_X32. */
3354 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3355 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3357 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3358 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3359 | OPTION_MASK_ABI_X32
3360 | OPTION_MASK_ABI_64);
3361 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3363 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3364 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3365 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3366 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3369 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3370 SUBTARGET_OVERRIDE_OPTIONS;
3371 #endif
3373 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3374 SUBSUBTARGET_OVERRIDE_OPTIONS;
3375 #endif
3377 /* -fPIC is the default for x86_64. */
3378 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3379 opts->x_flag_pic = 2;
3381 /* Need to check -mtune=generic first. */
3382 if (opts->x_ix86_tune_string)
3384 /* As special support for cross compilers we read -mtune=native
3385 as -mtune=generic. With native compilers we won't see the
3386 -mtune=native, as it was changed by the driver. */
3387 if (!strcmp (opts->x_ix86_tune_string, "native"))
3389 opts->x_ix86_tune_string = "generic";
3391 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3392 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3393 "%stune=k8%s or %stune=generic%s instead as appropriate",
3394 prefix, suffix, prefix, suffix, prefix, suffix);
3396 else
3398 if (opts->x_ix86_arch_string)
3399 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3400 if (!opts->x_ix86_tune_string)
3402 opts->x_ix86_tune_string
3403 = processor_target_table[TARGET_CPU_DEFAULT].name;
3404 ix86_tune_defaulted = 1;
3407 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3408 or defaulted. We need to use a sensible tune option. */
3409 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3411 opts->x_ix86_tune_string = "generic";
3415 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3416 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3418 /* rep; movq isn't available in 32-bit code. */
3419 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3420 opts->x_ix86_stringop_alg = no_stringop;
3423 if (!opts->x_ix86_arch_string)
3424 opts->x_ix86_arch_string
3425 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3426 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3427 else
3428 ix86_arch_specified = 1;
3430 if (opts_set->x_ix86_pmode)
3432 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3433 && opts->x_ix86_pmode == PMODE_SI)
3434 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3435 && opts->x_ix86_pmode == PMODE_DI))
3436 error ("address mode %qs not supported in the %s bit mode",
3437 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3438 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3440 else
3441 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3442 ? PMODE_DI : PMODE_SI;
3444 if (!opts_set->x_ix86_abi)
3445 opts->x_ix86_abi = DEFAULT_ABI;
3447 /* For targets using ms ABI enable ms-extensions, if not
3448 explicit turned off. For non-ms ABI we turn off this
3449 option. */
3450 if (!opts_set->x_flag_ms_extensions)
3451 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3453 if (opts_set->x_ix86_cmodel)
3455 switch (opts->x_ix86_cmodel)
3457 case CM_SMALL:
3458 case CM_SMALL_PIC:
3459 if (opts->x_flag_pic)
3460 opts->x_ix86_cmodel = CM_SMALL_PIC;
3461 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3462 error ("code model %qs not supported in the %s bit mode",
3463 "small", "32");
3464 break;
3466 case CM_MEDIUM:
3467 case CM_MEDIUM_PIC:
3468 if (opts->x_flag_pic)
3469 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3470 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3471 error ("code model %qs not supported in the %s bit mode",
3472 "medium", "32");
3473 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3474 error ("code model %qs not supported in x32 mode",
3475 "medium");
3476 break;
3478 case CM_LARGE:
3479 case CM_LARGE_PIC:
3480 if (opts->x_flag_pic)
3481 opts->x_ix86_cmodel = CM_LARGE_PIC;
3482 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3483 error ("code model %qs not supported in the %s bit mode",
3484 "large", "32");
3485 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3486 error ("code model %qs not supported in x32 mode",
3487 "large");
3488 break;
3490 case CM_32:
3491 if (opts->x_flag_pic)
3492 error ("code model %s does not support PIC mode", "32");
3493 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3494 error ("code model %qs not supported in the %s bit mode",
3495 "32", "64");
3496 break;
3498 case CM_KERNEL:
3499 if (opts->x_flag_pic)
3501 error ("code model %s does not support PIC mode", "kernel");
3502 opts->x_ix86_cmodel = CM_32;
3504 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3505 error ("code model %qs not supported in the %s bit mode",
3506 "kernel", "32");
3507 break;
3509 default:
3510 gcc_unreachable ();
3513 else
3515 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3516 use of rip-relative addressing. This eliminates fixups that
3517 would otherwise be needed if this object is to be placed in a
3518 DLL, and is essentially just as efficient as direct addressing. */
3519 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3520 && (TARGET_RDOS || TARGET_PECOFF))
3521 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3522 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3524 else
3525 opts->x_ix86_cmodel = CM_32;
3527 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3529 error ("-masm=intel not supported in this configuration");
3530 opts->x_ix86_asm_dialect = ASM_ATT;
3532 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3533 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3534 sorry ("%i-bit mode not compiled in",
3535 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3537 for (i = 0; i < pta_size; i++)
3538 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3540 ix86_schedule = processor_alias_table[i].schedule;
3541 ix86_arch = processor_alias_table[i].processor;
3542 /* Default cpu tuning to the architecture. */
3543 ix86_tune = ix86_arch;
3545 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3546 && !(processor_alias_table[i].flags & PTA_64BIT))
3547 error ("CPU you selected does not support x86-64 "
3548 "instruction set");
3550 if (processor_alias_table[i].flags & PTA_MMX
3551 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3552 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3553 if (processor_alias_table[i].flags & PTA_3DNOW
3554 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3555 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3556 if (processor_alias_table[i].flags & PTA_3DNOW_A
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3559 if (processor_alias_table[i].flags & PTA_SSE
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3562 if (processor_alias_table[i].flags & PTA_SSE2
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3565 if (processor_alias_table[i].flags & PTA_SSE3
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3568 if (processor_alias_table[i].flags & PTA_SSSE3
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3571 if (processor_alias_table[i].flags & PTA_SSE4_1
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3574 if (processor_alias_table[i].flags & PTA_SSE4_2
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3577 if (processor_alias_table[i].flags & PTA_AVX
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3580 if (processor_alias_table[i].flags & PTA_AVX2
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3583 if (processor_alias_table[i].flags & PTA_FMA
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3586 if (processor_alias_table[i].flags & PTA_SSE4A
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3589 if (processor_alias_table[i].flags & PTA_FMA4
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3592 if (processor_alias_table[i].flags & PTA_XOP
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3595 if (processor_alias_table[i].flags & PTA_LWP
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3598 if (processor_alias_table[i].flags & PTA_ABM
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3601 if (processor_alias_table[i].flags & PTA_BMI
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3604 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3607 if (processor_alias_table[i].flags & PTA_TBM
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3610 if (processor_alias_table[i].flags & PTA_BMI2
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3613 if (processor_alias_table[i].flags & PTA_CX16
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3616 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3619 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3620 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3623 if (processor_alias_table[i].flags & PTA_MOVBE
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3626 if (processor_alias_table[i].flags & PTA_AES
3627 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3628 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3629 if (processor_alias_table[i].flags & PTA_SHA
3630 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3631 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3632 if (processor_alias_table[i].flags & PTA_PCLMUL
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3635 if (processor_alias_table[i].flags & PTA_FSGSBASE
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3638 if (processor_alias_table[i].flags & PTA_RDRND
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3641 if (processor_alias_table[i].flags & PTA_F16C
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3644 if (processor_alias_table[i].flags & PTA_RTM
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3647 if (processor_alias_table[i].flags & PTA_HLE
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3650 if (processor_alias_table[i].flags & PTA_PRFCHW
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3653 if (processor_alias_table[i].flags & PTA_RDSEED
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3656 if (processor_alias_table[i].flags & PTA_ADX
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3659 if (processor_alias_table[i].flags & PTA_FXSR
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3662 if (processor_alias_table[i].flags & PTA_XSAVE
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3665 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3668 if (processor_alias_table[i].flags & PTA_AVX512F
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3671 if (processor_alias_table[i].flags & PTA_AVX512ER
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3674 if (processor_alias_table[i].flags & PTA_AVX512PF
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3677 if (processor_alias_table[i].flags & PTA_AVX512CD
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3680 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3683 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3686 if (processor_alias_table[i].flags & PTA_XSAVEC
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3689 if (processor_alias_table[i].flags & PTA_XSAVES
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3692 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3693 x86_prefetch_sse = true;
3695 break;
3698 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3699 error ("generic CPU can be used only for %stune=%s %s",
3700 prefix, suffix, sw);
3701 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3702 error ("intel CPU can be used only for %stune=%s %s",
3703 prefix, suffix, sw);
3704 else if (i == pta_size)
3705 error ("bad value (%s) for %sarch=%s %s",
3706 opts->x_ix86_arch_string, prefix, suffix, sw);
3708 ix86_arch_mask = 1u << ix86_arch;
3709 for (i = 0; i < X86_ARCH_LAST; ++i)
3710 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3712 for (i = 0; i < pta_size; i++)
3713 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3715 ix86_schedule = processor_alias_table[i].schedule;
3716 ix86_tune = processor_alias_table[i].processor;
3717 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3719 if (!(processor_alias_table[i].flags & PTA_64BIT))
3721 if (ix86_tune_defaulted)
3723 opts->x_ix86_tune_string = "x86-64";
3724 for (i = 0; i < pta_size; i++)
3725 if (! strcmp (opts->x_ix86_tune_string,
3726 processor_alias_table[i].name))
3727 break;
3728 ix86_schedule = processor_alias_table[i].schedule;
3729 ix86_tune = processor_alias_table[i].processor;
3731 else
3732 error ("CPU you selected does not support x86-64 "
3733 "instruction set");
3736 /* Intel CPUs have always interpreted SSE prefetch instructions as
3737 NOPs; so, we can enable SSE prefetch instructions even when
3738 -mtune (rather than -march) points us to a processor that has them.
3739 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3740 higher processors. */
3741 if (TARGET_CMOV
3742 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3743 x86_prefetch_sse = true;
3744 break;
3747 if (ix86_tune_specified && i == pta_size)
3748 error ("bad value (%s) for %stune=%s %s",
3749 opts->x_ix86_tune_string, prefix, suffix, sw);
3751 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3753 #ifndef USE_IX86_FRAME_POINTER
3754 #define USE_IX86_FRAME_POINTER 0
3755 #endif
3757 #ifndef USE_X86_64_FRAME_POINTER
3758 #define USE_X86_64_FRAME_POINTER 0
3759 #endif
3761 /* Set the default values for switches whose default depends on TARGET_64BIT
3762 in case they weren't overwritten by command line options. */
3763 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3765 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3766 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3767 if (opts->x_flag_asynchronous_unwind_tables
3768 && !opts_set->x_flag_unwind_tables
3769 && TARGET_64BIT_MS_ABI)
3770 opts->x_flag_unwind_tables = 1;
3771 if (opts->x_flag_asynchronous_unwind_tables == 2)
3772 opts->x_flag_unwind_tables
3773 = opts->x_flag_asynchronous_unwind_tables = 1;
3774 if (opts->x_flag_pcc_struct_return == 2)
3775 opts->x_flag_pcc_struct_return = 0;
3777 else
3779 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3780 opts->x_flag_omit_frame_pointer
3781 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3782 if (opts->x_flag_asynchronous_unwind_tables == 2)
3783 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3784 if (opts->x_flag_pcc_struct_return == 2)
3785 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3788 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3789 if (opts->x_optimize_size)
3790 ix86_cost = &ix86_size_cost;
3791 else
3792 ix86_cost = ix86_tune_cost;
3794 /* Arrange to set up i386_stack_locals for all functions. */
3795 init_machine_status = ix86_init_machine_status;
3797 /* Validate -mregparm= value. */
3798 if (opts_set->x_ix86_regparm)
3800 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3801 warning (0, "-mregparm is ignored in 64-bit mode");
3802 if (opts->x_ix86_regparm > REGPARM_MAX)
3804 error ("-mregparm=%d is not between 0 and %d",
3805 opts->x_ix86_regparm, REGPARM_MAX);
3806 opts->x_ix86_regparm = 0;
3809 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3810 opts->x_ix86_regparm = REGPARM_MAX;
3812 /* Default align_* from the processor table. */
3813 if (opts->x_align_loops == 0)
3815 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3816 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3818 if (opts->x_align_jumps == 0)
3820 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3821 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3823 if (opts->x_align_functions == 0)
3825 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3828 /* Provide default for -mbranch-cost= value. */
3829 if (!opts_set->x_ix86_branch_cost)
3830 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3832 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3834 opts->x_target_flags
3835 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3837 /* Enable by default the SSE and MMX builtins. Do allow the user to
3838 explicitly disable any of these. In particular, disabling SSE and
3839 MMX for kernel code is extremely useful. */
3840 if (!ix86_arch_specified)
3841 opts->x_ix86_isa_flags
3842 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3843 | TARGET_SUBTARGET64_ISA_DEFAULT)
3844 & ~opts->x_ix86_isa_flags_explicit);
3846 if (TARGET_RTD_P (opts->x_target_flags))
3847 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3849 else
3851 opts->x_target_flags
3852 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3854 if (!ix86_arch_specified)
3855 opts->x_ix86_isa_flags
3856 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3858 /* i386 ABI does not specify red zone. It still makes sense to use it
3859 when programmer takes care to stack from being destroyed. */
3860 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3861 opts->x_target_flags |= MASK_NO_RED_ZONE;
3864 /* Keep nonleaf frame pointers. */
3865 if (opts->x_flag_omit_frame_pointer)
3866 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3867 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3868 opts->x_flag_omit_frame_pointer = 1;
3870 /* If we're doing fast math, we don't care about comparison order
3871 wrt NaNs. This lets us use a shorter comparison sequence. */
3872 if (opts->x_flag_finite_math_only)
3873 opts->x_target_flags &= ~MASK_IEEE_FP;
3875 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3876 since the insns won't need emulation. */
3877 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3878 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3880 /* Likewise, if the target doesn't have a 387, or we've specified
3881 software floating point, don't use 387 inline intrinsics. */
3882 if (!TARGET_80387_P (opts->x_target_flags))
3883 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3885 /* Turn on MMX builtins for -msse. */
3886 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3887 opts->x_ix86_isa_flags
3888 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3890 /* Enable SSE prefetch. */
3891 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3892 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3893 x86_prefetch_sse = true;
3895 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3896 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3897 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3898 opts->x_ix86_isa_flags
3899 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3901 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3902 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3903 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3904 opts->x_ix86_isa_flags
3905 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3907 /* Enable lzcnt instruction for -mabm. */
3908 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3909 opts->x_ix86_isa_flags
3910 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3912 /* Validate -mpreferred-stack-boundary= value or default it to
3913 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3914 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3915 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3917 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3918 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3919 int max = (TARGET_SEH ? 4 : 12);
3921 if (opts->x_ix86_preferred_stack_boundary_arg < min
3922 || opts->x_ix86_preferred_stack_boundary_arg > max)
3924 if (min == max)
3925 error ("-mpreferred-stack-boundary is not supported "
3926 "for this target");
3927 else
3928 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3929 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3931 else
3932 ix86_preferred_stack_boundary
3933 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3936 /* Set the default value for -mstackrealign. */
3937 if (opts->x_ix86_force_align_arg_pointer == -1)
3938 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3940 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3942 /* Validate -mincoming-stack-boundary= value or default it to
3943 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3944 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3945 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3947 if (opts->x_ix86_incoming_stack_boundary_arg
3948 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3949 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3950 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3951 opts->x_ix86_incoming_stack_boundary_arg,
3952 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3953 else
3955 ix86_user_incoming_stack_boundary
3956 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3957 ix86_incoming_stack_boundary
3958 = ix86_user_incoming_stack_boundary;
3962 /* Accept -msseregparm only if at least SSE support is enabled. */
3963 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3964 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3965 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3967 if (opts_set->x_ix86_fpmath)
3969 if (opts->x_ix86_fpmath & FPMATH_SSE)
3971 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3973 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3974 opts->x_ix86_fpmath = FPMATH_387;
3976 else if ((opts->x_ix86_fpmath & FPMATH_387)
3977 && !TARGET_80387_P (opts->x_target_flags))
3979 warning (0, "387 instruction set disabled, using SSE arithmetics");
3980 opts->x_ix86_fpmath = FPMATH_SSE;
3984 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3985 fpmath=387. The second is however default at many targets since the
3986 extra 80bit precision of temporaries is considered to be part of ABI.
3987 Overwrite the default at least for -ffast-math.
3988 TODO: -mfpmath=both seems to produce same performing code with bit
3989 smaller binaries. It is however not clear if register allocation is
3990 ready for this setting.
3991 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3992 codegen. We may switch to 387 with -ffast-math for size optimized
3993 functions. */
3994 else if (fast_math_flags_set_p (&global_options)
3995 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3996 opts->x_ix86_fpmath = FPMATH_SSE;
3997 else
3998 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4000 /* If the i387 is disabled, then do not return values in it. */
4001 if (!TARGET_80387_P (opts->x_target_flags))
4002 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4004 /* Use external vectorized library in vectorizing intrinsics. */
4005 if (opts_set->x_ix86_veclibabi_type)
4006 switch (opts->x_ix86_veclibabi_type)
4008 case ix86_veclibabi_type_svml:
4009 ix86_veclib_handler = ix86_veclibabi_svml;
4010 break;
4012 case ix86_veclibabi_type_acml:
4013 ix86_veclib_handler = ix86_veclibabi_acml;
4014 break;
4016 default:
4017 gcc_unreachable ();
4020 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4021 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4022 && !opts->x_optimize_size)
4023 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4025 /* If stack probes are required, the space used for large function
4026 arguments on the stack must also be probed, so enable
4027 -maccumulate-outgoing-args so this happens in the prologue. */
4028 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4029 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4031 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4032 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4033 "for correctness", prefix, suffix);
4034 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4037 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4039 char *p;
4040 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4041 p = strchr (internal_label_prefix, 'X');
4042 internal_label_prefix_len = p - internal_label_prefix;
4043 *p = '\0';
4046 /* When scheduling description is not available, disable scheduler pass
4047 so it won't slow down the compilation and make x87 code slower. */
4048 if (!TARGET_SCHEDULE)
4049 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4051 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4052 ix86_tune_cost->simultaneous_prefetches,
4053 opts->x_param_values,
4054 opts_set->x_param_values);
4055 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4056 ix86_tune_cost->prefetch_block,
4057 opts->x_param_values,
4058 opts_set->x_param_values);
4059 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4060 ix86_tune_cost->l1_cache_size,
4061 opts->x_param_values,
4062 opts_set->x_param_values);
4063 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4064 ix86_tune_cost->l2_cache_size,
4065 opts->x_param_values,
4066 opts_set->x_param_values);
4068 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4069 if (opts->x_flag_prefetch_loop_arrays < 0
4070 && HAVE_prefetch
4071 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4072 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4073 opts->x_flag_prefetch_loop_arrays = 1;
4075 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4076 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4077 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4078 targetm.expand_builtin_va_start = NULL;
4080 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4082 ix86_gen_leave = gen_leave_rex64;
4083 if (Pmode == DImode)
4085 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4086 ix86_gen_tls_local_dynamic_base_64
4087 = gen_tls_local_dynamic_base_64_di;
4089 else
4091 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4092 ix86_gen_tls_local_dynamic_base_64
4093 = gen_tls_local_dynamic_base_64_si;
4096 else
4097 ix86_gen_leave = gen_leave;
4099 if (Pmode == DImode)
4101 ix86_gen_add3 = gen_adddi3;
4102 ix86_gen_sub3 = gen_subdi3;
4103 ix86_gen_sub3_carry = gen_subdi3_carry;
4104 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4105 ix86_gen_andsp = gen_anddi3;
4106 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4107 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4108 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4109 ix86_gen_monitor = gen_sse3_monitor_di;
4111 else
4113 ix86_gen_add3 = gen_addsi3;
4114 ix86_gen_sub3 = gen_subsi3;
4115 ix86_gen_sub3_carry = gen_subsi3_carry;
4116 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4117 ix86_gen_andsp = gen_andsi3;
4118 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4119 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4120 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4121 ix86_gen_monitor = gen_sse3_monitor_si;
4124 #ifdef USE_IX86_CLD
4125 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4126 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4127 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4128 #endif
4130 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4132 if (opts->x_flag_fentry > 0)
4133 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4134 "with -fpic");
4135 opts->x_flag_fentry = 0;
4137 else if (TARGET_SEH)
4139 if (opts->x_flag_fentry == 0)
4140 sorry ("-mno-fentry isn%'t compatible with SEH");
4141 opts->x_flag_fentry = 1;
4143 else if (opts->x_flag_fentry < 0)
4145 #if defined(PROFILE_BEFORE_PROLOGUE)
4146 opts->x_flag_fentry = 1;
4147 #else
4148 opts->x_flag_fentry = 0;
4149 #endif
4152 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4153 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4154 AVX unaligned load/store. */
4155 if (!opts->x_optimize_size)
4157 if (flag_expensive_optimizations
4158 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4159 opts->x_target_flags |= MASK_VZEROUPPER;
4160 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4161 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4162 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4163 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4164 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4165 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4166 /* Enable 128-bit AVX instruction generation
4167 for the auto-vectorizer. */
4168 if (TARGET_AVX128_OPTIMAL
4169 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4170 opts->x_target_flags |= MASK_PREFER_AVX128;
4173 if (opts->x_ix86_recip_name)
4175 char *p = ASTRDUP (opts->x_ix86_recip_name);
4176 char *q;
4177 unsigned int mask, i;
4178 bool invert;
4180 while ((q = strtok (p, ",")) != NULL)
4182 p = NULL;
4183 if (*q == '!')
4185 invert = true;
4186 q++;
4188 else
4189 invert = false;
4191 if (!strcmp (q, "default"))
4192 mask = RECIP_MASK_ALL;
4193 else
4195 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4196 if (!strcmp (q, recip_options[i].string))
4198 mask = recip_options[i].mask;
4199 break;
4202 if (i == ARRAY_SIZE (recip_options))
4204 error ("unknown option for -mrecip=%s", q);
4205 invert = false;
4206 mask = RECIP_MASK_NONE;
4210 opts->x_recip_mask_explicit |= mask;
4211 if (invert)
4212 opts->x_recip_mask &= ~mask;
4213 else
4214 opts->x_recip_mask |= mask;
4218 if (TARGET_RECIP_P (opts->x_target_flags))
4219 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4220 else if (opts_set->x_target_flags & MASK_RECIP)
4221 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4223 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4224 for 64-bit Bionic. */
4225 if (TARGET_HAS_BIONIC
4226 && !(opts_set->x_target_flags
4227 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4228 opts->x_target_flags |= (TARGET_64BIT
4229 ? MASK_LONG_DOUBLE_128
4230 : MASK_LONG_DOUBLE_64);
4232 /* Only one of them can be active. */
4233 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4234 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4236 /* Save the initial options in case the user does function specific
4237 options. */
4238 if (main_args_p)
4239 target_option_default_node = target_option_current_node
4240 = build_target_option_node (opts);
4242 /* Handle stack protector */
4243 if (!opts_set->x_ix86_stack_protector_guard)
4244 opts->x_ix86_stack_protector_guard
4245 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4247 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4248 if (opts->x_ix86_tune_memcpy_strategy)
4250 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4251 ix86_parse_stringop_strategy_string (str, false);
4252 free (str);
4255 if (opts->x_ix86_tune_memset_strategy)
4257 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4258 ix86_parse_stringop_strategy_string (str, true);
4259 free (str);
4263 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4265 static void
4266 ix86_option_override (void)
4268 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4269 static struct register_pass_info insert_vzeroupper_info
4270 = { pass_insert_vzeroupper, "reload",
4271 1, PASS_POS_INSERT_AFTER
4274 ix86_option_override_internal (true, &global_options, &global_options_set);
4277 /* This needs to be done at start up. It's convenient to do it here. */
4278 register_pass (&insert_vzeroupper_info);
4281 /* Update register usage after having seen the compiler flags. */
4283 static void
4284 ix86_conditional_register_usage (void)
4286 int i, c_mask;
4287 unsigned int j;
4289 /* The PIC register, if it exists, is fixed. */
4290 j = PIC_OFFSET_TABLE_REGNUM;
4291 if (j != INVALID_REGNUM)
4292 fixed_regs[j] = call_used_regs[j] = 1;
4294 /* For 32-bit targets, squash the REX registers. */
4295 if (! TARGET_64BIT)
4297 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4298 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4299 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4300 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4301 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4302 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4305 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4306 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4307 : TARGET_64BIT ? (1 << 2)
4308 : (1 << 1));
4310 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4312 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4314 /* Set/reset conditionally defined registers from
4315 CALL_USED_REGISTERS initializer. */
4316 if (call_used_regs[i] > 1)
4317 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4319 /* Calculate registers of CLOBBERED_REGS register set
4320 as call used registers from GENERAL_REGS register set. */
4321 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4322 && call_used_regs[i])
4323 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4326 /* If MMX is disabled, squash the registers. */
4327 if (! TARGET_MMX)
4328 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4329 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4330 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4332 /* If SSE is disabled, squash the registers. */
4333 if (! TARGET_SSE)
4334 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4335 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4336 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4338 /* If the FPU is disabled, squash the registers. */
4339 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4340 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4341 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4342 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4344 /* If AVX512F is disabled, squash the registers. */
4345 if (! TARGET_AVX512F)
4347 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4348 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4350 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4351 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4356 /* Save the current options */
4358 static void
4359 ix86_function_specific_save (struct cl_target_option *ptr,
4360 struct gcc_options *opts)
4362 ptr->arch = ix86_arch;
4363 ptr->schedule = ix86_schedule;
4364 ptr->tune = ix86_tune;
4365 ptr->branch_cost = ix86_branch_cost;
4366 ptr->tune_defaulted = ix86_tune_defaulted;
4367 ptr->arch_specified = ix86_arch_specified;
4368 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4369 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4370 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4371 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4372 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4373 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4374 ptr->x_ix86_abi = opts->x_ix86_abi;
4375 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4376 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4377 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4378 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4379 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4380 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4381 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4382 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4383 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4384 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4385 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4386 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4387 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4388 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4389 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4390 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4391 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4392 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4393 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4394 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4396 /* The fields are char but the variables are not; make sure the
4397 values fit in the fields. */
4398 gcc_assert (ptr->arch == ix86_arch);
4399 gcc_assert (ptr->schedule == ix86_schedule);
4400 gcc_assert (ptr->tune == ix86_tune);
4401 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4404 /* Restore the current options */
4406 static void
4407 ix86_function_specific_restore (struct gcc_options *opts,
4408 struct cl_target_option *ptr)
4410 enum processor_type old_tune = ix86_tune;
4411 enum processor_type old_arch = ix86_arch;
4412 unsigned int ix86_arch_mask;
4413 int i;
4415 /* We don't change -fPIC. */
4416 opts->x_flag_pic = flag_pic;
4418 ix86_arch = (enum processor_type) ptr->arch;
4419 ix86_schedule = (enum attr_cpu) ptr->schedule;
4420 ix86_tune = (enum processor_type) ptr->tune;
4421 opts->x_ix86_branch_cost = ptr->branch_cost;
4422 ix86_tune_defaulted = ptr->tune_defaulted;
4423 ix86_arch_specified = ptr->arch_specified;
4424 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4425 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4426 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4427 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4428 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4429 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4430 opts->x_ix86_abi = ptr->x_ix86_abi;
4431 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4432 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4433 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4434 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4435 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4436 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4437 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4438 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4439 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4440 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4441 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4442 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4443 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4444 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4445 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4446 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4447 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4448 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4449 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4450 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4452 /* Recreate the arch feature tests if the arch changed */
4453 if (old_arch != ix86_arch)
4455 ix86_arch_mask = 1u << ix86_arch;
4456 for (i = 0; i < X86_ARCH_LAST; ++i)
4457 ix86_arch_features[i]
4458 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4461 /* Recreate the tune optimization tests */
4462 if (old_tune != ix86_tune)
4463 set_ix86_tune_features (ix86_tune, false);
4466 /* Print the current options */
4468 static void
4469 ix86_function_specific_print (FILE *file, int indent,
4470 struct cl_target_option *ptr)
4472 char *target_string
4473 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4474 NULL, NULL, ptr->x_ix86_fpmath, false);
4476 gcc_assert (ptr->arch < PROCESSOR_max);
4477 fprintf (file, "%*sarch = %d (%s)\n",
4478 indent, "",
4479 ptr->arch, processor_target_table[ptr->arch].name);
4481 gcc_assert (ptr->tune < PROCESSOR_max);
4482 fprintf (file, "%*stune = %d (%s)\n",
4483 indent, "",
4484 ptr->tune, processor_target_table[ptr->tune].name);
4486 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4488 if (target_string)
4490 fprintf (file, "%*s%s\n", indent, "", target_string);
4491 free (target_string);
4496 /* Inner function to process the attribute((target(...))), take an argument and
4497 set the current options from the argument. If we have a list, recursively go
4498 over the list. */
4500 static bool
4501 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4502 struct gcc_options *opts,
4503 struct gcc_options *opts_set,
4504 struct gcc_options *enum_opts_set)
4506 char *next_optstr;
4507 bool ret = true;
4509 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4510 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4511 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4512 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4513 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4515 enum ix86_opt_type
4517 ix86_opt_unknown,
4518 ix86_opt_yes,
4519 ix86_opt_no,
4520 ix86_opt_str,
4521 ix86_opt_enum,
4522 ix86_opt_isa
4525 static const struct
4527 const char *string;
4528 size_t len;
4529 enum ix86_opt_type type;
4530 int opt;
4531 int mask;
4532 } attrs[] = {
4533 /* isa options */
4534 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4535 IX86_ATTR_ISA ("abm", OPT_mabm),
4536 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4537 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4538 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4539 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4540 IX86_ATTR_ISA ("aes", OPT_maes),
4541 IX86_ATTR_ISA ("sha", OPT_msha),
4542 IX86_ATTR_ISA ("avx", OPT_mavx),
4543 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4544 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4545 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4546 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4547 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4548 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4549 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4550 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4551 IX86_ATTR_ISA ("sse", OPT_msse),
4552 IX86_ATTR_ISA ("sse2", OPT_msse2),
4553 IX86_ATTR_ISA ("sse3", OPT_msse3),
4554 IX86_ATTR_ISA ("sse4", OPT_msse4),
4555 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4556 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4557 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4558 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4559 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4560 IX86_ATTR_ISA ("fma", OPT_mfma),
4561 IX86_ATTR_ISA ("xop", OPT_mxop),
4562 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4563 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4564 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4565 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4566 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4567 IX86_ATTR_ISA ("hle", OPT_mhle),
4568 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4569 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4570 IX86_ATTR_ISA ("adx", OPT_madx),
4571 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4572 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4573 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4574 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4575 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4576 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4577 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4579 /* enum options */
4580 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4582 /* string options */
4583 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4584 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4586 /* flag options */
4587 IX86_ATTR_YES ("cld",
4588 OPT_mcld,
4589 MASK_CLD),
4591 IX86_ATTR_NO ("fancy-math-387",
4592 OPT_mfancy_math_387,
4593 MASK_NO_FANCY_MATH_387),
4595 IX86_ATTR_YES ("ieee-fp",
4596 OPT_mieee_fp,
4597 MASK_IEEE_FP),
4599 IX86_ATTR_YES ("inline-all-stringops",
4600 OPT_minline_all_stringops,
4601 MASK_INLINE_ALL_STRINGOPS),
4603 IX86_ATTR_YES ("inline-stringops-dynamically",
4604 OPT_minline_stringops_dynamically,
4605 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4607 IX86_ATTR_NO ("align-stringops",
4608 OPT_mno_align_stringops,
4609 MASK_NO_ALIGN_STRINGOPS),
4611 IX86_ATTR_YES ("recip",
4612 OPT_mrecip,
4613 MASK_RECIP),
4617 /* If this is a list, recurse to get the options. */
4618 if (TREE_CODE (args) == TREE_LIST)
4620 bool ret = true;
4622 for (; args; args = TREE_CHAIN (args))
4623 if (TREE_VALUE (args)
4624 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4625 p_strings, opts, opts_set,
4626 enum_opts_set))
4627 ret = false;
4629 return ret;
4632 else if (TREE_CODE (args) != STRING_CST)
4634 error ("attribute %<target%> argument not a string");
4635 return false;
4638 /* Handle multiple arguments separated by commas. */
4639 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4641 while (next_optstr && *next_optstr != '\0')
4643 char *p = next_optstr;
4644 char *orig_p = p;
4645 char *comma = strchr (next_optstr, ',');
4646 const char *opt_string;
4647 size_t len, opt_len;
4648 int opt;
4649 bool opt_set_p;
4650 char ch;
4651 unsigned i;
4652 enum ix86_opt_type type = ix86_opt_unknown;
4653 int mask = 0;
4655 if (comma)
4657 *comma = '\0';
4658 len = comma - next_optstr;
4659 next_optstr = comma + 1;
4661 else
4663 len = strlen (p);
4664 next_optstr = NULL;
4667 /* Recognize no-xxx. */
4668 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4670 opt_set_p = false;
4671 p += 3;
4672 len -= 3;
4674 else
4675 opt_set_p = true;
4677 /* Find the option. */
4678 ch = *p;
4679 opt = N_OPTS;
4680 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4682 type = attrs[i].type;
4683 opt_len = attrs[i].len;
4684 if (ch == attrs[i].string[0]
4685 && ((type != ix86_opt_str && type != ix86_opt_enum)
4686 ? len == opt_len
4687 : len > opt_len)
4688 && memcmp (p, attrs[i].string, opt_len) == 0)
4690 opt = attrs[i].opt;
4691 mask = attrs[i].mask;
4692 opt_string = attrs[i].string;
4693 break;
4697 /* Process the option. */
4698 if (opt == N_OPTS)
4700 error ("attribute(target(\"%s\")) is unknown", orig_p);
4701 ret = false;
4704 else if (type == ix86_opt_isa)
4706 struct cl_decoded_option decoded;
4708 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4709 ix86_handle_option (opts, opts_set,
4710 &decoded, input_location);
4713 else if (type == ix86_opt_yes || type == ix86_opt_no)
4715 if (type == ix86_opt_no)
4716 opt_set_p = !opt_set_p;
4718 if (opt_set_p)
4719 opts->x_target_flags |= mask;
4720 else
4721 opts->x_target_flags &= ~mask;
4724 else if (type == ix86_opt_str)
4726 if (p_strings[opt])
4728 error ("option(\"%s\") was already specified", opt_string);
4729 ret = false;
4731 else
4732 p_strings[opt] = xstrdup (p + opt_len);
4735 else if (type == ix86_opt_enum)
4737 bool arg_ok;
4738 int value;
4740 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4741 if (arg_ok)
4742 set_option (opts, enum_opts_set, opt, value,
4743 p + opt_len, DK_UNSPECIFIED, input_location,
4744 global_dc);
4745 else
4747 error ("attribute(target(\"%s\")) is unknown", orig_p);
4748 ret = false;
4752 else
4753 gcc_unreachable ();
4756 return ret;
4759 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4761 tree
4762 ix86_valid_target_attribute_tree (tree args,
4763 struct gcc_options *opts,
4764 struct gcc_options *opts_set)
4766 const char *orig_arch_string = opts->x_ix86_arch_string;
4767 const char *orig_tune_string = opts->x_ix86_tune_string;
4768 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4769 int orig_tune_defaulted = ix86_tune_defaulted;
4770 int orig_arch_specified = ix86_arch_specified;
4771 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4772 tree t = NULL_TREE;
4773 int i;
4774 struct cl_target_option *def
4775 = TREE_TARGET_OPTION (target_option_default_node);
4776 struct gcc_options enum_opts_set;
4778 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4780 /* Process each of the options on the chain. */
4781 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4782 opts_set, &enum_opts_set))
4783 return error_mark_node;
4785 /* If the changed options are different from the default, rerun
4786 ix86_option_override_internal, and then save the options away.
4787 The string options are are attribute options, and will be undone
4788 when we copy the save structure. */
4789 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4790 || opts->x_target_flags != def->x_target_flags
4791 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4792 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4793 || enum_opts_set.x_ix86_fpmath)
4795 /* If we are using the default tune= or arch=, undo the string assigned,
4796 and use the default. */
4797 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4798 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4799 else if (!orig_arch_specified)
4800 opts->x_ix86_arch_string = NULL;
4802 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4803 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4804 else if (orig_tune_defaulted)
4805 opts->x_ix86_tune_string = NULL;
4807 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4808 if (enum_opts_set.x_ix86_fpmath)
4809 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4810 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4811 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4813 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4814 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4817 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4818 ix86_option_override_internal (false, opts, opts_set);
4820 /* Add any builtin functions with the new isa if any. */
4821 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4823 /* Save the current options unless we are validating options for
4824 #pragma. */
4825 t = build_target_option_node (opts);
4827 opts->x_ix86_arch_string = orig_arch_string;
4828 opts->x_ix86_tune_string = orig_tune_string;
4829 opts_set->x_ix86_fpmath = orig_fpmath_set;
4831 /* Free up memory allocated to hold the strings */
4832 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4833 free (option_strings[i]);
4836 return t;
4839 /* Hook to validate attribute((target("string"))). */
4841 static bool
4842 ix86_valid_target_attribute_p (tree fndecl,
4843 tree ARG_UNUSED (name),
4844 tree args,
4845 int ARG_UNUSED (flags))
4847 struct gcc_options func_options;
4848 tree new_target, new_optimize;
4849 bool ret = true;
4851 /* attribute((target("default"))) does nothing, beyond
4852 affecting multi-versioning. */
4853 if (TREE_VALUE (args)
4854 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4855 && TREE_CHAIN (args) == NULL_TREE
4856 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4857 return true;
4859 tree old_optimize = build_optimization_node (&global_options);
4861 /* Get the optimization options of the current function. */
4862 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4864 if (!func_optimize)
4865 func_optimize = old_optimize;
4867 /* Init func_options. */
4868 memset (&func_options, 0, sizeof (func_options));
4869 init_options_struct (&func_options, NULL);
4870 lang_hooks.init_options_struct (&func_options);
4872 cl_optimization_restore (&func_options,
4873 TREE_OPTIMIZATION (func_optimize));
4875 /* Initialize func_options to the default before its target options can
4876 be set. */
4877 cl_target_option_restore (&func_options,
4878 TREE_TARGET_OPTION (target_option_default_node));
4880 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4881 &global_options_set);
4883 new_optimize = build_optimization_node (&func_options);
4885 if (new_target == error_mark_node)
4886 ret = false;
4888 else if (fndecl && new_target)
4890 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4892 if (old_optimize != new_optimize)
4893 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4896 return ret;
4900 /* Hook to determine if one function can safely inline another. */
4902 static bool
4903 ix86_can_inline_p (tree caller, tree callee)
4905 bool ret = false;
4906 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4907 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4909 /* If callee has no option attributes, then it is ok to inline. */
4910 if (!callee_tree)
4911 ret = true;
4913 /* If caller has no option attributes, but callee does then it is not ok to
4914 inline. */
4915 else if (!caller_tree)
4916 ret = false;
4918 else
4920 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4921 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4923 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4924 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4925 function. */
4926 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4927 != callee_opts->x_ix86_isa_flags)
4928 ret = false;
4930 /* See if we have the same non-isa options. */
4931 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4932 ret = false;
4934 /* See if arch, tune, etc. are the same. */
4935 else if (caller_opts->arch != callee_opts->arch)
4936 ret = false;
4938 else if (caller_opts->tune != callee_opts->tune)
4939 ret = false;
4941 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4942 ret = false;
4944 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4945 ret = false;
4947 else
4948 ret = true;
4951 return ret;
4955 /* Remember the last target of ix86_set_current_function. */
4956 static GTY(()) tree ix86_previous_fndecl;
4958 /* Invalidate ix86_previous_fndecl cache. */
4959 void
4960 ix86_reset_previous_fndecl (void)
4962 ix86_previous_fndecl = NULL_TREE;
4965 /* Establish appropriate back-end context for processing the function
4966 FNDECL. The argument might be NULL to indicate processing at top
4967 level, outside of any function scope. */
4968 static void
4969 ix86_set_current_function (tree fndecl)
4971 /* Only change the context if the function changes. This hook is called
4972 several times in the course of compiling a function, and we don't want to
4973 slow things down too much or call target_reinit when it isn't safe. */
4974 if (fndecl && fndecl != ix86_previous_fndecl)
4976 tree old_tree = (ix86_previous_fndecl
4977 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4978 : NULL_TREE);
4980 tree new_tree = (fndecl
4981 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4982 : NULL_TREE);
4984 ix86_previous_fndecl = fndecl;
4985 if (old_tree == new_tree)
4988 else if (new_tree)
4990 cl_target_option_restore (&global_options,
4991 TREE_TARGET_OPTION (new_tree));
4992 if (TREE_TARGET_GLOBALS (new_tree))
4993 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4994 else
4995 TREE_TARGET_GLOBALS (new_tree)
4996 = save_target_globals_default_opts ();
4999 else if (old_tree)
5001 new_tree = target_option_current_node;
5002 cl_target_option_restore (&global_options,
5003 TREE_TARGET_OPTION (new_tree));
5004 if (TREE_TARGET_GLOBALS (new_tree))
5005 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5006 else if (new_tree == target_option_default_node)
5007 restore_target_globals (&default_target_globals);
5008 else
5009 TREE_TARGET_GLOBALS (new_tree)
5010 = save_target_globals_default_opts ();
5016 /* Return true if this goes in large data/bss. */
5018 static bool
5019 ix86_in_large_data_p (tree exp)
5021 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5022 return false;
5024 /* Functions are never large data. */
5025 if (TREE_CODE (exp) == FUNCTION_DECL)
5026 return false;
5028 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5030 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5031 if (strcmp (section, ".ldata") == 0
5032 || strcmp (section, ".lbss") == 0)
5033 return true;
5034 return false;
5036 else
5038 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5040 /* If this is an incomplete type with size 0, then we can't put it
5041 in data because it might be too big when completed. */
5042 if (!size || size > ix86_section_threshold)
5043 return true;
5046 return false;
5049 /* Switch to the appropriate section for output of DECL.
5050 DECL is either a `VAR_DECL' node or a constant of some sort.
5051 RELOC indicates whether forming the initial value of DECL requires
5052 link-time relocations. */
5054 ATTRIBUTE_UNUSED static section *
5055 x86_64_elf_select_section (tree decl, int reloc,
5056 unsigned HOST_WIDE_INT align)
5058 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5059 && ix86_in_large_data_p (decl))
5061 const char *sname = NULL;
5062 unsigned int flags = SECTION_WRITE;
5063 switch (categorize_decl_for_section (decl, reloc))
5065 case SECCAT_DATA:
5066 sname = ".ldata";
5067 break;
5068 case SECCAT_DATA_REL:
5069 sname = ".ldata.rel";
5070 break;
5071 case SECCAT_DATA_REL_LOCAL:
5072 sname = ".ldata.rel.local";
5073 break;
5074 case SECCAT_DATA_REL_RO:
5075 sname = ".ldata.rel.ro";
5076 break;
5077 case SECCAT_DATA_REL_RO_LOCAL:
5078 sname = ".ldata.rel.ro.local";
5079 break;
5080 case SECCAT_BSS:
5081 sname = ".lbss";
5082 flags |= SECTION_BSS;
5083 break;
5084 case SECCAT_RODATA:
5085 case SECCAT_RODATA_MERGE_STR:
5086 case SECCAT_RODATA_MERGE_STR_INIT:
5087 case SECCAT_RODATA_MERGE_CONST:
5088 sname = ".lrodata";
5089 flags = 0;
5090 break;
5091 case SECCAT_SRODATA:
5092 case SECCAT_SDATA:
5093 case SECCAT_SBSS:
5094 gcc_unreachable ();
5095 case SECCAT_TEXT:
5096 case SECCAT_TDATA:
5097 case SECCAT_TBSS:
5098 /* We don't split these for medium model. Place them into
5099 default sections and hope for best. */
5100 break;
5102 if (sname)
5104 /* We might get called with string constants, but get_named_section
5105 doesn't like them as they are not DECLs. Also, we need to set
5106 flags in that case. */
5107 if (!DECL_P (decl))
5108 return get_section (sname, flags, NULL);
5109 return get_named_section (decl, sname, reloc);
5112 return default_elf_select_section (decl, reloc, align);
5115 /* Select a set of attributes for section NAME based on the properties
5116 of DECL and whether or not RELOC indicates that DECL's initializer
5117 might contain runtime relocations. */
5119 static unsigned int ATTRIBUTE_UNUSED
5120 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5122 unsigned int flags = default_section_type_flags (decl, name, reloc);
5124 if (decl == NULL_TREE
5125 && (strcmp (name, ".ldata.rel.ro") == 0
5126 || strcmp (name, ".ldata.rel.ro.local") == 0))
5127 flags |= SECTION_RELRO;
5129 if (strcmp (name, ".lbss") == 0
5130 || strncmp (name, ".lbss.", 5) == 0
5131 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5132 flags |= SECTION_BSS;
5134 return flags;
5137 /* Build up a unique section name, expressed as a
5138 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5139 RELOC indicates whether the initial value of EXP requires
5140 link-time relocations. */
5142 static void ATTRIBUTE_UNUSED
5143 x86_64_elf_unique_section (tree decl, int reloc)
5145 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5146 && ix86_in_large_data_p (decl))
5148 const char *prefix = NULL;
5149 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5150 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5152 switch (categorize_decl_for_section (decl, reloc))
5154 case SECCAT_DATA:
5155 case SECCAT_DATA_REL:
5156 case SECCAT_DATA_REL_LOCAL:
5157 case SECCAT_DATA_REL_RO:
5158 case SECCAT_DATA_REL_RO_LOCAL:
5159 prefix = one_only ? ".ld" : ".ldata";
5160 break;
5161 case SECCAT_BSS:
5162 prefix = one_only ? ".lb" : ".lbss";
5163 break;
5164 case SECCAT_RODATA:
5165 case SECCAT_RODATA_MERGE_STR:
5166 case SECCAT_RODATA_MERGE_STR_INIT:
5167 case SECCAT_RODATA_MERGE_CONST:
5168 prefix = one_only ? ".lr" : ".lrodata";
5169 break;
5170 case SECCAT_SRODATA:
5171 case SECCAT_SDATA:
5172 case SECCAT_SBSS:
5173 gcc_unreachable ();
5174 case SECCAT_TEXT:
5175 case SECCAT_TDATA:
5176 case SECCAT_TBSS:
5177 /* We don't split these for medium model. Place them into
5178 default sections and hope for best. */
5179 break;
5181 if (prefix)
5183 const char *name, *linkonce;
5184 char *string;
5186 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5187 name = targetm.strip_name_encoding (name);
5189 /* If we're using one_only, then there needs to be a .gnu.linkonce
5190 prefix to the section name. */
5191 linkonce = one_only ? ".gnu.linkonce" : "";
5193 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5195 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5196 return;
5199 default_unique_section (decl, reloc);
5202 #ifdef COMMON_ASM_OP
5203 /* This says how to output assembler code to declare an
5204 uninitialized external linkage data object.
5206 For medium model x86-64 we need to use .largecomm opcode for
5207 large objects. */
5208 void
5209 x86_elf_aligned_common (FILE *file,
5210 const char *name, unsigned HOST_WIDE_INT size,
5211 int align)
5213 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5214 && size > (unsigned int)ix86_section_threshold)
5215 fputs (".largecomm\t", file);
5216 else
5217 fputs (COMMON_ASM_OP, file);
5218 assemble_name (file, name);
5219 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5220 size, align / BITS_PER_UNIT);
5222 #endif
5224 /* Utility function for targets to use in implementing
5225 ASM_OUTPUT_ALIGNED_BSS. */
5227 void
5228 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5229 const char *name, unsigned HOST_WIDE_INT size,
5230 int align)
5232 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5233 && size > (unsigned int)ix86_section_threshold)
5234 switch_to_section (get_named_section (decl, ".lbss", 0));
5235 else
5236 switch_to_section (bss_section);
5237 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5238 #ifdef ASM_DECLARE_OBJECT_NAME
5239 last_assemble_variable_decl = decl;
5240 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5241 #else
5242 /* Standard thing is just output label for the object. */
5243 ASM_OUTPUT_LABEL (file, name);
5244 #endif /* ASM_DECLARE_OBJECT_NAME */
5245 ASM_OUTPUT_SKIP (file, size ? size : 1);
5248 /* Decide whether we must probe the stack before any space allocation
5249 on this target. It's essentially TARGET_STACK_PROBE except when
5250 -fstack-check causes the stack to be already probed differently. */
5252 bool
5253 ix86_target_stack_probe (void)
5255 /* Do not probe the stack twice if static stack checking is enabled. */
5256 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5257 return false;
5259 return TARGET_STACK_PROBE;
5262 /* Decide whether we can make a sibling call to a function. DECL is the
5263 declaration of the function being targeted by the call and EXP is the
5264 CALL_EXPR representing the call. */
5266 static bool
5267 ix86_function_ok_for_sibcall (tree decl, tree exp)
5269 tree type, decl_or_type;
5270 rtx a, b;
5272 /* If we are generating position-independent code, we cannot sibcall
5273 optimize any indirect call, or a direct call to a global function,
5274 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5275 if (!TARGET_MACHO
5276 && !TARGET_64BIT
5277 && flag_pic
5278 && (!decl || !targetm.binds_local_p (decl)))
5279 return false;
5281 /* If we need to align the outgoing stack, then sibcalling would
5282 unalign the stack, which may break the called function. */
5283 if (ix86_minimum_incoming_stack_boundary (true)
5284 < PREFERRED_STACK_BOUNDARY)
5285 return false;
5287 if (decl)
5289 decl_or_type = decl;
5290 type = TREE_TYPE (decl);
5292 else
5294 /* We're looking at the CALL_EXPR, we need the type of the function. */
5295 type = CALL_EXPR_FN (exp); /* pointer expression */
5296 type = TREE_TYPE (type); /* pointer type */
5297 type = TREE_TYPE (type); /* function type */
5298 decl_or_type = type;
5301 /* Check that the return value locations are the same. Like
5302 if we are returning floats on the 80387 register stack, we cannot
5303 make a sibcall from a function that doesn't return a float to a
5304 function that does or, conversely, from a function that does return
5305 a float to a function that doesn't; the necessary stack adjustment
5306 would not be executed. This is also the place we notice
5307 differences in the return value ABI. Note that it is ok for one
5308 of the functions to have void return type as long as the return
5309 value of the other is passed in a register. */
5310 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5311 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5312 cfun->decl, false);
5313 if (STACK_REG_P (a) || STACK_REG_P (b))
5315 if (!rtx_equal_p (a, b))
5316 return false;
5318 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5320 else if (!rtx_equal_p (a, b))
5321 return false;
5323 if (TARGET_64BIT)
5325 /* The SYSV ABI has more call-clobbered registers;
5326 disallow sibcalls from MS to SYSV. */
5327 if (cfun->machine->call_abi == MS_ABI
5328 && ix86_function_type_abi (type) == SYSV_ABI)
5329 return false;
5331 else
5333 /* If this call is indirect, we'll need to be able to use a
5334 call-clobbered register for the address of the target function.
5335 Make sure that all such registers are not used for passing
5336 parameters. Note that DLLIMPORT functions are indirect. */
5337 if (!decl
5338 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5340 if (ix86_function_regparm (type, NULL) >= 3)
5342 /* ??? Need to count the actual number of registers to be used,
5343 not the possible number of registers. Fix later. */
5344 return false;
5349 /* Otherwise okay. That also includes certain types of indirect calls. */
5350 return true;
5353 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5354 and "sseregparm" calling convention attributes;
5355 arguments as in struct attribute_spec.handler. */
5357 static tree
5358 ix86_handle_cconv_attribute (tree *node, tree name,
5359 tree args,
5360 int flags ATTRIBUTE_UNUSED,
5361 bool *no_add_attrs)
5363 if (TREE_CODE (*node) != FUNCTION_TYPE
5364 && TREE_CODE (*node) != METHOD_TYPE
5365 && TREE_CODE (*node) != FIELD_DECL
5366 && TREE_CODE (*node) != TYPE_DECL)
5368 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5369 name);
5370 *no_add_attrs = true;
5371 return NULL_TREE;
5374 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5375 if (is_attribute_p ("regparm", name))
5377 tree cst;
5379 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5381 error ("fastcall and regparm attributes are not compatible");
5384 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5386 error ("regparam and thiscall attributes are not compatible");
5389 cst = TREE_VALUE (args);
5390 if (TREE_CODE (cst) != INTEGER_CST)
5392 warning (OPT_Wattributes,
5393 "%qE attribute requires an integer constant argument",
5394 name);
5395 *no_add_attrs = true;
5397 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5399 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5400 name, REGPARM_MAX);
5401 *no_add_attrs = true;
5404 return NULL_TREE;
5407 if (TARGET_64BIT)
5409 /* Do not warn when emulating the MS ABI. */
5410 if ((TREE_CODE (*node) != FUNCTION_TYPE
5411 && TREE_CODE (*node) != METHOD_TYPE)
5412 || ix86_function_type_abi (*node) != MS_ABI)
5413 warning (OPT_Wattributes, "%qE attribute ignored",
5414 name);
5415 *no_add_attrs = true;
5416 return NULL_TREE;
5419 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5420 if (is_attribute_p ("fastcall", name))
5422 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5424 error ("fastcall and cdecl attributes are not compatible");
5426 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5428 error ("fastcall and stdcall attributes are not compatible");
5430 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5432 error ("fastcall and regparm attributes are not compatible");
5434 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5436 error ("fastcall and thiscall attributes are not compatible");
5440 /* Can combine stdcall with fastcall (redundant), regparm and
5441 sseregparm. */
5442 else if (is_attribute_p ("stdcall", name))
5444 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5446 error ("stdcall and cdecl attributes are not compatible");
5448 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5450 error ("stdcall and fastcall attributes are not compatible");
5452 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5454 error ("stdcall and thiscall attributes are not compatible");
5458 /* Can combine cdecl with regparm and sseregparm. */
5459 else if (is_attribute_p ("cdecl", name))
5461 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5463 error ("stdcall and cdecl attributes are not compatible");
5465 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5467 error ("fastcall and cdecl attributes are not compatible");
5469 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5471 error ("cdecl and thiscall attributes are not compatible");
5474 else if (is_attribute_p ("thiscall", name))
5476 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5477 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5478 name);
5479 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5481 error ("stdcall and thiscall attributes are not compatible");
5483 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5485 error ("fastcall and thiscall attributes are not compatible");
5487 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5489 error ("cdecl and thiscall attributes are not compatible");
5493 /* Can combine sseregparm with all attributes. */
5495 return NULL_TREE;
5498 /* The transactional memory builtins are implicitly regparm or fastcall
5499 depending on the ABI. Override the generic do-nothing attribute that
5500 these builtins were declared with, and replace it with one of the two
5501 attributes that we expect elsewhere. */
5503 static tree
5504 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5505 tree args ATTRIBUTE_UNUSED,
5506 int flags, bool *no_add_attrs)
5508 tree alt;
5510 /* In no case do we want to add the placeholder attribute. */
5511 *no_add_attrs = true;
5513 /* The 64-bit ABI is unchanged for transactional memory. */
5514 if (TARGET_64BIT)
5515 return NULL_TREE;
5517 /* ??? Is there a better way to validate 32-bit windows? We have
5518 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5519 if (CHECK_STACK_LIMIT > 0)
5520 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5521 else
5523 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5524 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5526 decl_attributes (node, alt, flags);
5528 return NULL_TREE;
5531 /* This function determines from TYPE the calling-convention. */
5533 unsigned int
5534 ix86_get_callcvt (const_tree type)
5536 unsigned int ret = 0;
5537 bool is_stdarg;
5538 tree attrs;
5540 if (TARGET_64BIT)
5541 return IX86_CALLCVT_CDECL;
5543 attrs = TYPE_ATTRIBUTES (type);
5544 if (attrs != NULL_TREE)
5546 if (lookup_attribute ("cdecl", attrs))
5547 ret |= IX86_CALLCVT_CDECL;
5548 else if (lookup_attribute ("stdcall", attrs))
5549 ret |= IX86_CALLCVT_STDCALL;
5550 else if (lookup_attribute ("fastcall", attrs))
5551 ret |= IX86_CALLCVT_FASTCALL;
5552 else if (lookup_attribute ("thiscall", attrs))
5553 ret |= IX86_CALLCVT_THISCALL;
5555 /* Regparam isn't allowed for thiscall and fastcall. */
5556 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5558 if (lookup_attribute ("regparm", attrs))
5559 ret |= IX86_CALLCVT_REGPARM;
5560 if (lookup_attribute ("sseregparm", attrs))
5561 ret |= IX86_CALLCVT_SSEREGPARM;
5564 if (IX86_BASE_CALLCVT(ret) != 0)
5565 return ret;
5568 is_stdarg = stdarg_p (type);
5569 if (TARGET_RTD && !is_stdarg)
5570 return IX86_CALLCVT_STDCALL | ret;
5572 if (ret != 0
5573 || is_stdarg
5574 || TREE_CODE (type) != METHOD_TYPE
5575 || ix86_function_type_abi (type) != MS_ABI)
5576 return IX86_CALLCVT_CDECL | ret;
5578 return IX86_CALLCVT_THISCALL;
5581 /* Return 0 if the attributes for two types are incompatible, 1 if they
5582 are compatible, and 2 if they are nearly compatible (which causes a
5583 warning to be generated). */
5585 static int
5586 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5588 unsigned int ccvt1, ccvt2;
5590 if (TREE_CODE (type1) != FUNCTION_TYPE
5591 && TREE_CODE (type1) != METHOD_TYPE)
5592 return 1;
5594 ccvt1 = ix86_get_callcvt (type1);
5595 ccvt2 = ix86_get_callcvt (type2);
5596 if (ccvt1 != ccvt2)
5597 return 0;
5598 if (ix86_function_regparm (type1, NULL)
5599 != ix86_function_regparm (type2, NULL))
5600 return 0;
5602 return 1;
5605 /* Return the regparm value for a function with the indicated TYPE and DECL.
5606 DECL may be NULL when calling function indirectly
5607 or considering a libcall. */
5609 static int
5610 ix86_function_regparm (const_tree type, const_tree decl)
5612 tree attr;
5613 int regparm;
5614 unsigned int ccvt;
5616 if (TARGET_64BIT)
5617 return (ix86_function_type_abi (type) == SYSV_ABI
5618 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5619 ccvt = ix86_get_callcvt (type);
5620 regparm = ix86_regparm;
5622 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5624 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5625 if (attr)
5627 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5628 return regparm;
5631 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5632 return 2;
5633 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5634 return 1;
5636 /* Use register calling convention for local functions when possible. */
5637 if (decl
5638 && TREE_CODE (decl) == FUNCTION_DECL
5639 /* Caller and callee must agree on the calling convention, so
5640 checking here just optimize means that with
5641 __attribute__((optimize (...))) caller could use regparm convention
5642 and callee not, or vice versa. Instead look at whether the callee
5643 is optimized or not. */
5644 && opt_for_fn (decl, optimize)
5645 && !(profile_flag && !flag_fentry))
5647 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5648 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5649 if (i && i->local && i->can_change_signature)
5651 int local_regparm, globals = 0, regno;
5653 /* Make sure no regparm register is taken by a
5654 fixed register variable. */
5655 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5656 if (fixed_regs[local_regparm])
5657 break;
5659 /* We don't want to use regparm(3) for nested functions as
5660 these use a static chain pointer in the third argument. */
5661 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5662 local_regparm = 2;
5664 /* In 32-bit mode save a register for the split stack. */
5665 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5666 local_regparm = 2;
5668 /* Each fixed register usage increases register pressure,
5669 so less registers should be used for argument passing.
5670 This functionality can be overriden by an explicit
5671 regparm value. */
5672 for (regno = AX_REG; regno <= DI_REG; regno++)
5673 if (fixed_regs[regno])
5674 globals++;
5676 local_regparm
5677 = globals < local_regparm ? local_regparm - globals : 0;
5679 if (local_regparm > regparm)
5680 regparm = local_regparm;
5684 return regparm;
5687 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5688 DFmode (2) arguments in SSE registers for a function with the
5689 indicated TYPE and DECL. DECL may be NULL when calling function
5690 indirectly or considering a libcall. Otherwise return 0. */
5692 static int
5693 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5695 gcc_assert (!TARGET_64BIT);
5697 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5698 by the sseregparm attribute. */
5699 if (TARGET_SSEREGPARM
5700 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5702 if (!TARGET_SSE)
5704 if (warn)
5706 if (decl)
5707 error ("calling %qD with attribute sseregparm without "
5708 "SSE/SSE2 enabled", decl);
5709 else
5710 error ("calling %qT with attribute sseregparm without "
5711 "SSE/SSE2 enabled", type);
5713 return 0;
5716 return 2;
5719 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5720 (and DFmode for SSE2) arguments in SSE registers. */
5721 if (decl && TARGET_SSE_MATH && optimize
5722 && !(profile_flag && !flag_fentry))
5724 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5725 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5726 if (i && i->local && i->can_change_signature)
5727 return TARGET_SSE2 ? 2 : 1;
5730 return 0;
5733 /* Return true if EAX is live at the start of the function. Used by
5734 ix86_expand_prologue to determine if we need special help before
5735 calling allocate_stack_worker. */
5737 static bool
5738 ix86_eax_live_at_start_p (void)
5740 /* Cheat. Don't bother working forward from ix86_function_regparm
5741 to the function type to whether an actual argument is located in
5742 eax. Instead just look at cfg info, which is still close enough
5743 to correct at this point. This gives false positives for broken
5744 functions that might use uninitialized data that happens to be
5745 allocated in eax, but who cares? */
5746 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5749 static bool
5750 ix86_keep_aggregate_return_pointer (tree fntype)
5752 tree attr;
5754 if (!TARGET_64BIT)
5756 attr = lookup_attribute ("callee_pop_aggregate_return",
5757 TYPE_ATTRIBUTES (fntype));
5758 if (attr)
5759 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5761 /* For 32-bit MS-ABI the default is to keep aggregate
5762 return pointer. */
5763 if (ix86_function_type_abi (fntype) == MS_ABI)
5764 return true;
5766 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5769 /* Value is the number of bytes of arguments automatically
5770 popped when returning from a subroutine call.
5771 FUNDECL is the declaration node of the function (as a tree),
5772 FUNTYPE is the data type of the function (as a tree),
5773 or for a library call it is an identifier node for the subroutine name.
5774 SIZE is the number of bytes of arguments passed on the stack.
5776 On the 80386, the RTD insn may be used to pop them if the number
5777 of args is fixed, but if the number is variable then the caller
5778 must pop them all. RTD can't be used for library calls now
5779 because the library is compiled with the Unix compiler.
5780 Use of RTD is a selectable option, since it is incompatible with
5781 standard Unix calling sequences. If the option is not selected,
5782 the caller must always pop the args.
5784 The attribute stdcall is equivalent to RTD on a per module basis. */
5786 static int
5787 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5789 unsigned int ccvt;
5791 /* None of the 64-bit ABIs pop arguments. */
5792 if (TARGET_64BIT)
5793 return 0;
5795 ccvt = ix86_get_callcvt (funtype);
5797 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5798 | IX86_CALLCVT_THISCALL)) != 0
5799 && ! stdarg_p (funtype))
5800 return size;
5802 /* Lose any fake structure return argument if it is passed on the stack. */
5803 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5804 && !ix86_keep_aggregate_return_pointer (funtype))
5806 int nregs = ix86_function_regparm (funtype, fundecl);
5807 if (nregs == 0)
5808 return GET_MODE_SIZE (Pmode);
5811 return 0;
5814 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5816 static bool
5817 ix86_legitimate_combined_insn (rtx insn)
5819 /* Check operand constraints in case hard registers were propagated
5820 into insn pattern. This check prevents combine pass from
5821 generating insn patterns with invalid hard register operands.
5822 These invalid insns can eventually confuse reload to error out
5823 with a spill failure. See also PRs 46829 and 46843. */
5824 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5826 int i;
5828 extract_insn (insn);
5829 preprocess_constraints ();
5831 for (i = 0; i < recog_data.n_operands; i++)
5833 rtx op = recog_data.operand[i];
5834 enum machine_mode mode = GET_MODE (op);
5835 struct operand_alternative *op_alt;
5836 int offset = 0;
5837 bool win;
5838 int j;
5840 /* For pre-AVX disallow unaligned loads/stores where the
5841 instructions don't support it. */
5842 if (!TARGET_AVX
5843 && VECTOR_MODE_P (GET_MODE (op))
5844 && misaligned_operand (op, GET_MODE (op)))
5846 int min_align = get_attr_ssememalign (insn);
5847 if (min_align == 0)
5848 return false;
5851 /* A unary operator may be accepted by the predicate, but it
5852 is irrelevant for matching constraints. */
5853 if (UNARY_P (op))
5854 op = XEXP (op, 0);
5856 if (GET_CODE (op) == SUBREG)
5858 if (REG_P (SUBREG_REG (op))
5859 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5860 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5861 GET_MODE (SUBREG_REG (op)),
5862 SUBREG_BYTE (op),
5863 GET_MODE (op));
5864 op = SUBREG_REG (op);
5867 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5868 continue;
5870 op_alt = recog_op_alt[i];
5872 /* Operand has no constraints, anything is OK. */
5873 win = !recog_data.n_alternatives;
5875 for (j = 0; j < recog_data.n_alternatives; j++)
5877 if (op_alt[j].anything_ok
5878 || (op_alt[j].matches != -1
5879 && operands_match_p
5880 (recog_data.operand[i],
5881 recog_data.operand[op_alt[j].matches]))
5882 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5884 win = true;
5885 break;
5889 if (!win)
5890 return false;
5894 return true;
5897 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5899 static unsigned HOST_WIDE_INT
5900 ix86_asan_shadow_offset (void)
5902 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5903 : HOST_WIDE_INT_C (0x7fff8000))
5904 : (HOST_WIDE_INT_1 << 29);
5907 /* Argument support functions. */
5909 /* Return true when register may be used to pass function parameters. */
5910 bool
5911 ix86_function_arg_regno_p (int regno)
5913 int i;
5914 const int *parm_regs;
5916 if (!TARGET_64BIT)
5918 if (TARGET_MACHO)
5919 return (regno < REGPARM_MAX
5920 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5921 else
5922 return (regno < REGPARM_MAX
5923 || (TARGET_MMX && MMX_REGNO_P (regno)
5924 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5925 || (TARGET_SSE && SSE_REGNO_P (regno)
5926 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5929 if (TARGET_SSE && SSE_REGNO_P (regno)
5930 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5931 return true;
5933 /* TODO: The function should depend on current function ABI but
5934 builtins.c would need updating then. Therefore we use the
5935 default ABI. */
5937 /* RAX is used as hidden argument to va_arg functions. */
5938 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5939 return true;
5941 if (ix86_abi == MS_ABI)
5942 parm_regs = x86_64_ms_abi_int_parameter_registers;
5943 else
5944 parm_regs = x86_64_int_parameter_registers;
5945 for (i = 0; i < (ix86_abi == MS_ABI
5946 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5947 if (regno == parm_regs[i])
5948 return true;
5949 return false;
5952 /* Return if we do not know how to pass TYPE solely in registers. */
5954 static bool
5955 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5957 if (must_pass_in_stack_var_size_or_pad (mode, type))
5958 return true;
5960 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5961 The layout_type routine is crafty and tries to trick us into passing
5962 currently unsupported vector types on the stack by using TImode. */
5963 return (!TARGET_64BIT && mode == TImode
5964 && type && TREE_CODE (type) != VECTOR_TYPE);
5967 /* It returns the size, in bytes, of the area reserved for arguments passed
5968 in registers for the function represented by fndecl dependent to the used
5969 abi format. */
5971 ix86_reg_parm_stack_space (const_tree fndecl)
5973 enum calling_abi call_abi = SYSV_ABI;
5974 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5975 call_abi = ix86_function_abi (fndecl);
5976 else
5977 call_abi = ix86_function_type_abi (fndecl);
5978 if (TARGET_64BIT && call_abi == MS_ABI)
5979 return 32;
5980 return 0;
5983 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5984 call abi used. */
5985 enum calling_abi
5986 ix86_function_type_abi (const_tree fntype)
5988 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5990 enum calling_abi abi = ix86_abi;
5991 if (abi == SYSV_ABI)
5993 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5994 abi = MS_ABI;
5996 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5997 abi = SYSV_ABI;
5998 return abi;
6000 return ix86_abi;
6003 /* We add this as a workaround in order to use libc_has_function
6004 hook in i386.md. */
6005 bool
6006 ix86_libc_has_function (enum function_class fn_class)
6008 return targetm.libc_has_function (fn_class);
6011 static bool
6012 ix86_function_ms_hook_prologue (const_tree fn)
6014 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6016 if (decl_function_context (fn) != NULL_TREE)
6017 error_at (DECL_SOURCE_LOCATION (fn),
6018 "ms_hook_prologue is not compatible with nested function");
6019 else
6020 return true;
6022 return false;
6025 static enum calling_abi
6026 ix86_function_abi (const_tree fndecl)
6028 if (! fndecl)
6029 return ix86_abi;
6030 return ix86_function_type_abi (TREE_TYPE (fndecl));
6033 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6034 call abi used. */
6035 enum calling_abi
6036 ix86_cfun_abi (void)
6038 if (! cfun)
6039 return ix86_abi;
6040 return cfun->machine->call_abi;
6043 /* Write the extra assembler code needed to declare a function properly. */
6045 void
6046 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6047 tree decl)
6049 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6051 if (is_ms_hook)
6053 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6054 unsigned int filler_cc = 0xcccccccc;
6056 for (i = 0; i < filler_count; i += 4)
6057 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6060 #ifdef SUBTARGET_ASM_UNWIND_INIT
6061 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6062 #endif
6064 ASM_OUTPUT_LABEL (asm_out_file, fname);
6066 /* Output magic byte marker, if hot-patch attribute is set. */
6067 if (is_ms_hook)
6069 if (TARGET_64BIT)
6071 /* leaq [%rsp + 0], %rsp */
6072 asm_fprintf (asm_out_file, ASM_BYTE
6073 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6075 else
6077 /* movl.s %edi, %edi
6078 push %ebp
6079 movl.s %esp, %ebp */
6080 asm_fprintf (asm_out_file, ASM_BYTE
6081 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6086 /* regclass.c */
6087 extern void init_regs (void);
6089 /* Implementation of call abi switching target hook. Specific to FNDECL
6090 the specific call register sets are set. See also
6091 ix86_conditional_register_usage for more details. */
6092 void
6093 ix86_call_abi_override (const_tree fndecl)
6095 if (fndecl == NULL_TREE)
6096 cfun->machine->call_abi = ix86_abi;
6097 else
6098 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6101 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6102 expensive re-initialization of init_regs each time we switch function context
6103 since this is needed only during RTL expansion. */
6104 static void
6105 ix86_maybe_switch_abi (void)
6107 if (TARGET_64BIT &&
6108 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6109 reinit_regs ();
6112 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6113 for a call to a function whose data type is FNTYPE.
6114 For a library call, FNTYPE is 0. */
6116 void
6117 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6118 tree fntype, /* tree ptr for function decl */
6119 rtx libname, /* SYMBOL_REF of library name or 0 */
6120 tree fndecl,
6121 int caller)
6123 struct cgraph_local_info *i;
6125 memset (cum, 0, sizeof (*cum));
6127 if (fndecl)
6129 i = cgraph_local_info (fndecl);
6130 cum->call_abi = ix86_function_abi (fndecl);
6132 else
6134 i = NULL;
6135 cum->call_abi = ix86_function_type_abi (fntype);
6138 cum->caller = caller;
6140 /* Set up the number of registers to use for passing arguments. */
6141 cum->nregs = ix86_regparm;
6142 if (TARGET_64BIT)
6144 cum->nregs = (cum->call_abi == SYSV_ABI
6145 ? X86_64_REGPARM_MAX
6146 : X86_64_MS_REGPARM_MAX);
6148 if (TARGET_SSE)
6150 cum->sse_nregs = SSE_REGPARM_MAX;
6151 if (TARGET_64BIT)
6153 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6154 ? X86_64_SSE_REGPARM_MAX
6155 : X86_64_MS_SSE_REGPARM_MAX);
6158 if (TARGET_MMX)
6159 cum->mmx_nregs = MMX_REGPARM_MAX;
6160 cum->warn_avx512f = true;
6161 cum->warn_avx = true;
6162 cum->warn_sse = true;
6163 cum->warn_mmx = true;
6165 /* Because type might mismatch in between caller and callee, we need to
6166 use actual type of function for local calls.
6167 FIXME: cgraph_analyze can be told to actually record if function uses
6168 va_start so for local functions maybe_vaarg can be made aggressive
6169 helping K&R code.
6170 FIXME: once typesytem is fixed, we won't need this code anymore. */
6171 if (i && i->local && i->can_change_signature)
6172 fntype = TREE_TYPE (fndecl);
6173 cum->maybe_vaarg = (fntype
6174 ? (!prototype_p (fntype) || stdarg_p (fntype))
6175 : !libname);
6177 if (!TARGET_64BIT)
6179 /* If there are variable arguments, then we won't pass anything
6180 in registers in 32-bit mode. */
6181 if (stdarg_p (fntype))
6183 cum->nregs = 0;
6184 cum->sse_nregs = 0;
6185 cum->mmx_nregs = 0;
6186 cum->warn_avx512f = false;
6187 cum->warn_avx = false;
6188 cum->warn_sse = false;
6189 cum->warn_mmx = false;
6190 return;
6193 /* Use ecx and edx registers if function has fastcall attribute,
6194 else look for regparm information. */
6195 if (fntype)
6197 unsigned int ccvt = ix86_get_callcvt (fntype);
6198 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6200 cum->nregs = 1;
6201 cum->fastcall = 1; /* Same first register as in fastcall. */
6203 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6205 cum->nregs = 2;
6206 cum->fastcall = 1;
6208 else
6209 cum->nregs = ix86_function_regparm (fntype, fndecl);
6212 /* Set up the number of SSE registers used for passing SFmode
6213 and DFmode arguments. Warn for mismatching ABI. */
6214 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6218 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6219 But in the case of vector types, it is some vector mode.
6221 When we have only some of our vector isa extensions enabled, then there
6222 are some modes for which vector_mode_supported_p is false. For these
6223 modes, the generic vector support in gcc will choose some non-vector mode
6224 in order to implement the type. By computing the natural mode, we'll
6225 select the proper ABI location for the operand and not depend on whatever
6226 the middle-end decides to do with these vector types.
6228 The midde-end can't deal with the vector types > 16 bytes. In this
6229 case, we return the original mode and warn ABI change if CUM isn't
6230 NULL.
6232 If INT_RETURN is true, warn ABI change if the vector mode isn't
6233 available for function return value. */
6235 static enum machine_mode
6236 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6237 bool in_return)
6239 enum machine_mode mode = TYPE_MODE (type);
6241 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6243 HOST_WIDE_INT size = int_size_in_bytes (type);
6244 if ((size == 8 || size == 16 || size == 32 || size == 64)
6245 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6246 && TYPE_VECTOR_SUBPARTS (type) > 1)
6248 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6250 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6251 mode = MIN_MODE_VECTOR_FLOAT;
6252 else
6253 mode = MIN_MODE_VECTOR_INT;
6255 /* Get the mode which has this inner mode and number of units. */
6256 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6257 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6258 && GET_MODE_INNER (mode) == innermode)
6260 if (size == 64 && !TARGET_AVX512F)
6262 static bool warnedavx512f;
6263 static bool warnedavx512f_ret;
6265 if (cum && cum->warn_avx512f && !warnedavx512f)
6267 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6268 "without AVX512F enabled changes the ABI"))
6269 warnedavx512f = true;
6271 else if (in_return && !warnedavx512f_ret)
6273 if (warning (OPT_Wpsabi, "AVX512F vector return "
6274 "without AVX512F enabled changes the ABI"))
6275 warnedavx512f_ret = true;
6278 return TYPE_MODE (type);
6280 else if (size == 32 && !TARGET_AVX)
6282 static bool warnedavx;
6283 static bool warnedavx_ret;
6285 if (cum && cum->warn_avx && !warnedavx)
6287 if (warning (OPT_Wpsabi, "AVX vector argument "
6288 "without AVX enabled changes the ABI"))
6289 warnedavx = true;
6291 else if (in_return && !warnedavx_ret)
6293 if (warning (OPT_Wpsabi, "AVX vector return "
6294 "without AVX enabled changes the ABI"))
6295 warnedavx_ret = true;
6298 return TYPE_MODE (type);
6300 else if (((size == 8 && TARGET_64BIT) || size == 16)
6301 && !TARGET_SSE)
6303 static bool warnedsse;
6304 static bool warnedsse_ret;
6306 if (cum && cum->warn_sse && !warnedsse)
6308 if (warning (OPT_Wpsabi, "SSE vector argument "
6309 "without SSE enabled changes the ABI"))
6310 warnedsse = true;
6312 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6314 if (warning (OPT_Wpsabi, "SSE vector return "
6315 "without SSE enabled changes the ABI"))
6316 warnedsse_ret = true;
6319 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6321 static bool warnedmmx;
6322 static bool warnedmmx_ret;
6324 if (cum && cum->warn_mmx && !warnedmmx)
6326 if (warning (OPT_Wpsabi, "MMX vector argument "
6327 "without MMX enabled changes the ABI"))
6328 warnedmmx = true;
6330 else if (in_return && !warnedmmx_ret)
6332 if (warning (OPT_Wpsabi, "MMX vector return "
6333 "without MMX enabled changes the ABI"))
6334 warnedmmx_ret = true;
6337 return mode;
6340 gcc_unreachable ();
6344 return mode;
6347 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6348 this may not agree with the mode that the type system has chosen for the
6349 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6350 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6352 static rtx
6353 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6354 unsigned int regno)
6356 rtx tmp;
6358 if (orig_mode != BLKmode)
6359 tmp = gen_rtx_REG (orig_mode, regno);
6360 else
6362 tmp = gen_rtx_REG (mode, regno);
6363 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6364 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6367 return tmp;
6370 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6371 of this code is to classify each 8bytes of incoming argument by the register
6372 class and assign registers accordingly. */
6374 /* Return the union class of CLASS1 and CLASS2.
6375 See the x86-64 PS ABI for details. */
6377 static enum x86_64_reg_class
6378 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6380 /* Rule #1: If both classes are equal, this is the resulting class. */
6381 if (class1 == class2)
6382 return class1;
6384 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6385 the other class. */
6386 if (class1 == X86_64_NO_CLASS)
6387 return class2;
6388 if (class2 == X86_64_NO_CLASS)
6389 return class1;
6391 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6392 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6393 return X86_64_MEMORY_CLASS;
6395 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6396 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6397 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6398 return X86_64_INTEGERSI_CLASS;
6399 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6400 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6401 return X86_64_INTEGER_CLASS;
6403 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6404 MEMORY is used. */
6405 if (class1 == X86_64_X87_CLASS
6406 || class1 == X86_64_X87UP_CLASS
6407 || class1 == X86_64_COMPLEX_X87_CLASS
6408 || class2 == X86_64_X87_CLASS
6409 || class2 == X86_64_X87UP_CLASS
6410 || class2 == X86_64_COMPLEX_X87_CLASS)
6411 return X86_64_MEMORY_CLASS;
6413 /* Rule #6: Otherwise class SSE is used. */
6414 return X86_64_SSE_CLASS;
6417 /* Classify the argument of type TYPE and mode MODE.
6418 CLASSES will be filled by the register class used to pass each word
6419 of the operand. The number of words is returned. In case the parameter
6420 should be passed in memory, 0 is returned. As a special case for zero
6421 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6423 BIT_OFFSET is used internally for handling records and specifies offset
6424 of the offset in bits modulo 512 to avoid overflow cases.
6426 See the x86-64 PS ABI for details.
6429 static int
6430 classify_argument (enum machine_mode mode, const_tree type,
6431 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6433 HOST_WIDE_INT bytes =
6434 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6435 int words
6436 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6438 /* Variable sized entities are always passed/returned in memory. */
6439 if (bytes < 0)
6440 return 0;
6442 if (mode != VOIDmode
6443 && targetm.calls.must_pass_in_stack (mode, type))
6444 return 0;
6446 if (type && AGGREGATE_TYPE_P (type))
6448 int i;
6449 tree field;
6450 enum x86_64_reg_class subclasses[MAX_CLASSES];
6452 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6453 if (bytes > 64)
6454 return 0;
6456 for (i = 0; i < words; i++)
6457 classes[i] = X86_64_NO_CLASS;
6459 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6460 signalize memory class, so handle it as special case. */
6461 if (!words)
6463 classes[0] = X86_64_NO_CLASS;
6464 return 1;
6467 /* Classify each field of record and merge classes. */
6468 switch (TREE_CODE (type))
6470 case RECORD_TYPE:
6471 /* And now merge the fields of structure. */
6472 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6474 if (TREE_CODE (field) == FIELD_DECL)
6476 int num;
6478 if (TREE_TYPE (field) == error_mark_node)
6479 continue;
6481 /* Bitfields are always classified as integer. Handle them
6482 early, since later code would consider them to be
6483 misaligned integers. */
6484 if (DECL_BIT_FIELD (field))
6486 for (i = (int_bit_position (field)
6487 + (bit_offset % 64)) / 8 / 8;
6488 i < ((int_bit_position (field) + (bit_offset % 64))
6489 + tree_to_shwi (DECL_SIZE (field))
6490 + 63) / 8 / 8; i++)
6491 classes[i] =
6492 merge_classes (X86_64_INTEGER_CLASS,
6493 classes[i]);
6495 else
6497 int pos;
6499 type = TREE_TYPE (field);
6501 /* Flexible array member is ignored. */
6502 if (TYPE_MODE (type) == BLKmode
6503 && TREE_CODE (type) == ARRAY_TYPE
6504 && TYPE_SIZE (type) == NULL_TREE
6505 && TYPE_DOMAIN (type) != NULL_TREE
6506 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6507 == NULL_TREE))
6509 static bool warned;
6511 if (!warned && warn_psabi)
6513 warned = true;
6514 inform (input_location,
6515 "the ABI of passing struct with"
6516 " a flexible array member has"
6517 " changed in GCC 4.4");
6519 continue;
6521 num = classify_argument (TYPE_MODE (type), type,
6522 subclasses,
6523 (int_bit_position (field)
6524 + bit_offset) % 512);
6525 if (!num)
6526 return 0;
6527 pos = (int_bit_position (field)
6528 + (bit_offset % 64)) / 8 / 8;
6529 for (i = 0; i < num && (i + pos) < words; i++)
6530 classes[i + pos] =
6531 merge_classes (subclasses[i], classes[i + pos]);
6535 break;
6537 case ARRAY_TYPE:
6538 /* Arrays are handled as small records. */
6540 int num;
6541 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6542 TREE_TYPE (type), subclasses, bit_offset);
6543 if (!num)
6544 return 0;
6546 /* The partial classes are now full classes. */
6547 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6548 subclasses[0] = X86_64_SSE_CLASS;
6549 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6550 && !((bit_offset % 64) == 0 && bytes == 4))
6551 subclasses[0] = X86_64_INTEGER_CLASS;
6553 for (i = 0; i < words; i++)
6554 classes[i] = subclasses[i % num];
6556 break;
6558 case UNION_TYPE:
6559 case QUAL_UNION_TYPE:
6560 /* Unions are similar to RECORD_TYPE but offset is always 0.
6562 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6564 if (TREE_CODE (field) == FIELD_DECL)
6566 int num;
6568 if (TREE_TYPE (field) == error_mark_node)
6569 continue;
6571 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6572 TREE_TYPE (field), subclasses,
6573 bit_offset);
6574 if (!num)
6575 return 0;
6576 for (i = 0; i < num; i++)
6577 classes[i] = merge_classes (subclasses[i], classes[i]);
6580 break;
6582 default:
6583 gcc_unreachable ();
6586 if (words > 2)
6588 /* When size > 16 bytes, if the first one isn't
6589 X86_64_SSE_CLASS or any other ones aren't
6590 X86_64_SSEUP_CLASS, everything should be passed in
6591 memory. */
6592 if (classes[0] != X86_64_SSE_CLASS)
6593 return 0;
6595 for (i = 1; i < words; i++)
6596 if (classes[i] != X86_64_SSEUP_CLASS)
6597 return 0;
6600 /* Final merger cleanup. */
6601 for (i = 0; i < words; i++)
6603 /* If one class is MEMORY, everything should be passed in
6604 memory. */
6605 if (classes[i] == X86_64_MEMORY_CLASS)
6606 return 0;
6608 /* The X86_64_SSEUP_CLASS should be always preceded by
6609 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6610 if (classes[i] == X86_64_SSEUP_CLASS
6611 && classes[i - 1] != X86_64_SSE_CLASS
6612 && classes[i - 1] != X86_64_SSEUP_CLASS)
6614 /* The first one should never be X86_64_SSEUP_CLASS. */
6615 gcc_assert (i != 0);
6616 classes[i] = X86_64_SSE_CLASS;
6619 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6620 everything should be passed in memory. */
6621 if (classes[i] == X86_64_X87UP_CLASS
6622 && (classes[i - 1] != X86_64_X87_CLASS))
6624 static bool warned;
6626 /* The first one should never be X86_64_X87UP_CLASS. */
6627 gcc_assert (i != 0);
6628 if (!warned && warn_psabi)
6630 warned = true;
6631 inform (input_location,
6632 "the ABI of passing union with long double"
6633 " has changed in GCC 4.4");
6635 return 0;
6638 return words;
6641 /* Compute alignment needed. We align all types to natural boundaries with
6642 exception of XFmode that is aligned to 64bits. */
6643 if (mode != VOIDmode && mode != BLKmode)
6645 int mode_alignment = GET_MODE_BITSIZE (mode);
6647 if (mode == XFmode)
6648 mode_alignment = 128;
6649 else if (mode == XCmode)
6650 mode_alignment = 256;
6651 if (COMPLEX_MODE_P (mode))
6652 mode_alignment /= 2;
6653 /* Misaligned fields are always returned in memory. */
6654 if (bit_offset % mode_alignment)
6655 return 0;
6658 /* for V1xx modes, just use the base mode */
6659 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6660 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6661 mode = GET_MODE_INNER (mode);
6663 /* Classification of atomic types. */
6664 switch (mode)
6666 case SDmode:
6667 case DDmode:
6668 classes[0] = X86_64_SSE_CLASS;
6669 return 1;
6670 case TDmode:
6671 classes[0] = X86_64_SSE_CLASS;
6672 classes[1] = X86_64_SSEUP_CLASS;
6673 return 2;
6674 case DImode:
6675 case SImode:
6676 case HImode:
6677 case QImode:
6678 case CSImode:
6679 case CHImode:
6680 case CQImode:
6682 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6684 /* Analyze last 128 bits only. */
6685 size = (size - 1) & 0x7f;
6687 if (size < 32)
6689 classes[0] = X86_64_INTEGERSI_CLASS;
6690 return 1;
6692 else if (size < 64)
6694 classes[0] = X86_64_INTEGER_CLASS;
6695 return 1;
6697 else if (size < 64+32)
6699 classes[0] = X86_64_INTEGER_CLASS;
6700 classes[1] = X86_64_INTEGERSI_CLASS;
6701 return 2;
6703 else if (size < 64+64)
6705 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6706 return 2;
6708 else
6709 gcc_unreachable ();
6711 case CDImode:
6712 case TImode:
6713 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6714 return 2;
6715 case COImode:
6716 case OImode:
6717 /* OImode shouldn't be used directly. */
6718 gcc_unreachable ();
6719 case CTImode:
6720 return 0;
6721 case SFmode:
6722 if (!(bit_offset % 64))
6723 classes[0] = X86_64_SSESF_CLASS;
6724 else
6725 classes[0] = X86_64_SSE_CLASS;
6726 return 1;
6727 case DFmode:
6728 classes[0] = X86_64_SSEDF_CLASS;
6729 return 1;
6730 case XFmode:
6731 classes[0] = X86_64_X87_CLASS;
6732 classes[1] = X86_64_X87UP_CLASS;
6733 return 2;
6734 case TFmode:
6735 classes[0] = X86_64_SSE_CLASS;
6736 classes[1] = X86_64_SSEUP_CLASS;
6737 return 2;
6738 case SCmode:
6739 classes[0] = X86_64_SSE_CLASS;
6740 if (!(bit_offset % 64))
6741 return 1;
6742 else
6744 static bool warned;
6746 if (!warned && warn_psabi)
6748 warned = true;
6749 inform (input_location,
6750 "the ABI of passing structure with complex float"
6751 " member has changed in GCC 4.4");
6753 classes[1] = X86_64_SSESF_CLASS;
6754 return 2;
6756 case DCmode:
6757 classes[0] = X86_64_SSEDF_CLASS;
6758 classes[1] = X86_64_SSEDF_CLASS;
6759 return 2;
6760 case XCmode:
6761 classes[0] = X86_64_COMPLEX_X87_CLASS;
6762 return 1;
6763 case TCmode:
6764 /* This modes is larger than 16 bytes. */
6765 return 0;
6766 case V8SFmode:
6767 case V8SImode:
6768 case V32QImode:
6769 case V16HImode:
6770 case V4DFmode:
6771 case V4DImode:
6772 classes[0] = X86_64_SSE_CLASS;
6773 classes[1] = X86_64_SSEUP_CLASS;
6774 classes[2] = X86_64_SSEUP_CLASS;
6775 classes[3] = X86_64_SSEUP_CLASS;
6776 return 4;
6777 case V8DFmode:
6778 case V16SFmode:
6779 case V8DImode:
6780 case V16SImode:
6781 case V32HImode:
6782 case V64QImode:
6783 classes[0] = X86_64_SSE_CLASS;
6784 classes[1] = X86_64_SSEUP_CLASS;
6785 classes[2] = X86_64_SSEUP_CLASS;
6786 classes[3] = X86_64_SSEUP_CLASS;
6787 classes[4] = X86_64_SSEUP_CLASS;
6788 classes[5] = X86_64_SSEUP_CLASS;
6789 classes[6] = X86_64_SSEUP_CLASS;
6790 classes[7] = X86_64_SSEUP_CLASS;
6791 return 8;
6792 case V4SFmode:
6793 case V4SImode:
6794 case V16QImode:
6795 case V8HImode:
6796 case V2DFmode:
6797 case V2DImode:
6798 classes[0] = X86_64_SSE_CLASS;
6799 classes[1] = X86_64_SSEUP_CLASS;
6800 return 2;
6801 case V1TImode:
6802 case V1DImode:
6803 case V2SFmode:
6804 case V2SImode:
6805 case V4HImode:
6806 case V8QImode:
6807 classes[0] = X86_64_SSE_CLASS;
6808 return 1;
6809 case BLKmode:
6810 case VOIDmode:
6811 return 0;
6812 default:
6813 gcc_assert (VECTOR_MODE_P (mode));
6815 if (bytes > 16)
6816 return 0;
6818 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6820 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6821 classes[0] = X86_64_INTEGERSI_CLASS;
6822 else
6823 classes[0] = X86_64_INTEGER_CLASS;
6824 classes[1] = X86_64_INTEGER_CLASS;
6825 return 1 + (bytes > 8);
6829 /* Examine the argument and return set number of register required in each
6830 class. Return true iff parameter should be passed in memory. */
6832 static bool
6833 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6834 int *int_nregs, int *sse_nregs)
6836 enum x86_64_reg_class regclass[MAX_CLASSES];
6837 int n = classify_argument (mode, type, regclass, 0);
6839 *int_nregs = 0;
6840 *sse_nregs = 0;
6842 if (!n)
6843 return true;
6844 for (n--; n >= 0; n--)
6845 switch (regclass[n])
6847 case X86_64_INTEGER_CLASS:
6848 case X86_64_INTEGERSI_CLASS:
6849 (*int_nregs)++;
6850 break;
6851 case X86_64_SSE_CLASS:
6852 case X86_64_SSESF_CLASS:
6853 case X86_64_SSEDF_CLASS:
6854 (*sse_nregs)++;
6855 break;
6856 case X86_64_NO_CLASS:
6857 case X86_64_SSEUP_CLASS:
6858 break;
6859 case X86_64_X87_CLASS:
6860 case X86_64_X87UP_CLASS:
6861 case X86_64_COMPLEX_X87_CLASS:
6862 if (!in_return)
6863 return true;
6864 break;
6865 case X86_64_MEMORY_CLASS:
6866 gcc_unreachable ();
6869 return false;
6872 /* Construct container for the argument used by GCC interface. See
6873 FUNCTION_ARG for the detailed description. */
6875 static rtx
6876 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6877 const_tree type, int in_return, int nintregs, int nsseregs,
6878 const int *intreg, int sse_regno)
6880 /* The following variables hold the static issued_error state. */
6881 static bool issued_sse_arg_error;
6882 static bool issued_sse_ret_error;
6883 static bool issued_x87_ret_error;
6885 enum machine_mode tmpmode;
6886 int bytes =
6887 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6888 enum x86_64_reg_class regclass[MAX_CLASSES];
6889 int n;
6890 int i;
6891 int nexps = 0;
6892 int needed_sseregs, needed_intregs;
6893 rtx exp[MAX_CLASSES];
6894 rtx ret;
6896 n = classify_argument (mode, type, regclass, 0);
6897 if (!n)
6898 return NULL;
6899 if (examine_argument (mode, type, in_return, &needed_intregs,
6900 &needed_sseregs))
6901 return NULL;
6902 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6903 return NULL;
6905 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6906 some less clueful developer tries to use floating-point anyway. */
6907 if (needed_sseregs && !TARGET_SSE)
6909 if (in_return)
6911 if (!issued_sse_ret_error)
6913 error ("SSE register return with SSE disabled");
6914 issued_sse_ret_error = true;
6917 else if (!issued_sse_arg_error)
6919 error ("SSE register argument with SSE disabled");
6920 issued_sse_arg_error = true;
6922 return NULL;
6925 /* Likewise, error if the ABI requires us to return values in the
6926 x87 registers and the user specified -mno-80387. */
6927 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6928 for (i = 0; i < n; i++)
6929 if (regclass[i] == X86_64_X87_CLASS
6930 || regclass[i] == X86_64_X87UP_CLASS
6931 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6933 if (!issued_x87_ret_error)
6935 error ("x87 register return with x87 disabled");
6936 issued_x87_ret_error = true;
6938 return NULL;
6941 /* First construct simple cases. Avoid SCmode, since we want to use
6942 single register to pass this type. */
6943 if (n == 1 && mode != SCmode)
6944 switch (regclass[0])
6946 case X86_64_INTEGER_CLASS:
6947 case X86_64_INTEGERSI_CLASS:
6948 return gen_rtx_REG (mode, intreg[0]);
6949 case X86_64_SSE_CLASS:
6950 case X86_64_SSESF_CLASS:
6951 case X86_64_SSEDF_CLASS:
6952 if (mode != BLKmode)
6953 return gen_reg_or_parallel (mode, orig_mode,
6954 SSE_REGNO (sse_regno));
6955 break;
6956 case X86_64_X87_CLASS:
6957 case X86_64_COMPLEX_X87_CLASS:
6958 return gen_rtx_REG (mode, FIRST_STACK_REG);
6959 case X86_64_NO_CLASS:
6960 /* Zero sized array, struct or class. */
6961 return NULL;
6962 default:
6963 gcc_unreachable ();
6965 if (n == 2
6966 && regclass[0] == X86_64_SSE_CLASS
6967 && regclass[1] == X86_64_SSEUP_CLASS
6968 && mode != BLKmode)
6969 return gen_reg_or_parallel (mode, orig_mode,
6970 SSE_REGNO (sse_regno));
6971 if (n == 4
6972 && regclass[0] == X86_64_SSE_CLASS
6973 && regclass[1] == X86_64_SSEUP_CLASS
6974 && regclass[2] == X86_64_SSEUP_CLASS
6975 && regclass[3] == X86_64_SSEUP_CLASS
6976 && mode != BLKmode)
6977 return gen_reg_or_parallel (mode, orig_mode,
6978 SSE_REGNO (sse_regno));
6979 if (n == 8
6980 && regclass[0] == X86_64_SSE_CLASS
6981 && regclass[1] == X86_64_SSEUP_CLASS
6982 && regclass[2] == X86_64_SSEUP_CLASS
6983 && regclass[3] == X86_64_SSEUP_CLASS
6984 && regclass[4] == X86_64_SSEUP_CLASS
6985 && regclass[5] == X86_64_SSEUP_CLASS
6986 && regclass[6] == X86_64_SSEUP_CLASS
6987 && regclass[7] == X86_64_SSEUP_CLASS
6988 && mode != BLKmode)
6989 return gen_reg_or_parallel (mode, orig_mode,
6990 SSE_REGNO (sse_regno));
6991 if (n == 2
6992 && regclass[0] == X86_64_X87_CLASS
6993 && regclass[1] == X86_64_X87UP_CLASS)
6994 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6996 if (n == 2
6997 && regclass[0] == X86_64_INTEGER_CLASS
6998 && regclass[1] == X86_64_INTEGER_CLASS
6999 && (mode == CDImode || mode == TImode)
7000 && intreg[0] + 1 == intreg[1])
7001 return gen_rtx_REG (mode, intreg[0]);
7003 /* Otherwise figure out the entries of the PARALLEL. */
7004 for (i = 0; i < n; i++)
7006 int pos;
7008 switch (regclass[i])
7010 case X86_64_NO_CLASS:
7011 break;
7012 case X86_64_INTEGER_CLASS:
7013 case X86_64_INTEGERSI_CLASS:
7014 /* Merge TImodes on aligned occasions here too. */
7015 if (i * 8 + 8 > bytes)
7016 tmpmode
7017 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7018 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7019 tmpmode = SImode;
7020 else
7021 tmpmode = DImode;
7022 /* We've requested 24 bytes we
7023 don't have mode for. Use DImode. */
7024 if (tmpmode == BLKmode)
7025 tmpmode = DImode;
7026 exp [nexps++]
7027 = gen_rtx_EXPR_LIST (VOIDmode,
7028 gen_rtx_REG (tmpmode, *intreg),
7029 GEN_INT (i*8));
7030 intreg++;
7031 break;
7032 case X86_64_SSESF_CLASS:
7033 exp [nexps++]
7034 = gen_rtx_EXPR_LIST (VOIDmode,
7035 gen_rtx_REG (SFmode,
7036 SSE_REGNO (sse_regno)),
7037 GEN_INT (i*8));
7038 sse_regno++;
7039 break;
7040 case X86_64_SSEDF_CLASS:
7041 exp [nexps++]
7042 = gen_rtx_EXPR_LIST (VOIDmode,
7043 gen_rtx_REG (DFmode,
7044 SSE_REGNO (sse_regno)),
7045 GEN_INT (i*8));
7046 sse_regno++;
7047 break;
7048 case X86_64_SSE_CLASS:
7049 pos = i;
7050 switch (n)
7052 case 1:
7053 tmpmode = DImode;
7054 break;
7055 case 2:
7056 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7058 tmpmode = TImode;
7059 i++;
7061 else
7062 tmpmode = DImode;
7063 break;
7064 case 4:
7065 gcc_assert (i == 0
7066 && regclass[1] == X86_64_SSEUP_CLASS
7067 && regclass[2] == X86_64_SSEUP_CLASS
7068 && regclass[3] == X86_64_SSEUP_CLASS);
7069 tmpmode = OImode;
7070 i += 3;
7071 break;
7072 case 8:
7073 gcc_assert (i == 0
7074 && regclass[1] == X86_64_SSEUP_CLASS
7075 && regclass[2] == X86_64_SSEUP_CLASS
7076 && regclass[3] == X86_64_SSEUP_CLASS
7077 && regclass[4] == X86_64_SSEUP_CLASS
7078 && regclass[5] == X86_64_SSEUP_CLASS
7079 && regclass[6] == X86_64_SSEUP_CLASS
7080 && regclass[7] == X86_64_SSEUP_CLASS);
7081 tmpmode = XImode;
7082 i += 7;
7083 break;
7084 default:
7085 gcc_unreachable ();
7087 exp [nexps++]
7088 = gen_rtx_EXPR_LIST (VOIDmode,
7089 gen_rtx_REG (tmpmode,
7090 SSE_REGNO (sse_regno)),
7091 GEN_INT (pos*8));
7092 sse_regno++;
7093 break;
7094 default:
7095 gcc_unreachable ();
7099 /* Empty aligned struct, union or class. */
7100 if (nexps == 0)
7101 return NULL;
7103 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7104 for (i = 0; i < nexps; i++)
7105 XVECEXP (ret, 0, i) = exp [i];
7106 return ret;
7109 /* Update the data in CUM to advance over an argument of mode MODE
7110 and data type TYPE. (TYPE is null for libcalls where that information
7111 may not be available.) */
7113 static void
7114 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7115 const_tree type, HOST_WIDE_INT bytes,
7116 HOST_WIDE_INT words)
7118 switch (mode)
7120 default:
7121 break;
7123 case BLKmode:
7124 if (bytes < 0)
7125 break;
7126 /* FALLTHRU */
7128 case DImode:
7129 case SImode:
7130 case HImode:
7131 case QImode:
7132 cum->words += words;
7133 cum->nregs -= words;
7134 cum->regno += words;
7136 if (cum->nregs <= 0)
7138 cum->nregs = 0;
7139 cum->regno = 0;
7141 break;
7143 case OImode:
7144 /* OImode shouldn't be used directly. */
7145 gcc_unreachable ();
7147 case DFmode:
7148 if (cum->float_in_sse < 2)
7149 break;
7150 case SFmode:
7151 if (cum->float_in_sse < 1)
7152 break;
7153 /* FALLTHRU */
7155 case V8SFmode:
7156 case V8SImode:
7157 case V64QImode:
7158 case V32HImode:
7159 case V16SImode:
7160 case V8DImode:
7161 case V16SFmode:
7162 case V8DFmode:
7163 case V32QImode:
7164 case V16HImode:
7165 case V4DFmode:
7166 case V4DImode:
7167 case TImode:
7168 case V16QImode:
7169 case V8HImode:
7170 case V4SImode:
7171 case V2DImode:
7172 case V4SFmode:
7173 case V2DFmode:
7174 if (!type || !AGGREGATE_TYPE_P (type))
7176 cum->sse_words += words;
7177 cum->sse_nregs -= 1;
7178 cum->sse_regno += 1;
7179 if (cum->sse_nregs <= 0)
7181 cum->sse_nregs = 0;
7182 cum->sse_regno = 0;
7185 break;
7187 case V8QImode:
7188 case V4HImode:
7189 case V2SImode:
7190 case V2SFmode:
7191 case V1TImode:
7192 case V1DImode:
7193 if (!type || !AGGREGATE_TYPE_P (type))
7195 cum->mmx_words += words;
7196 cum->mmx_nregs -= 1;
7197 cum->mmx_regno += 1;
7198 if (cum->mmx_nregs <= 0)
7200 cum->mmx_nregs = 0;
7201 cum->mmx_regno = 0;
7204 break;
7208 static void
7209 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7210 const_tree type, HOST_WIDE_INT words, bool named)
7212 int int_nregs, sse_nregs;
7214 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7215 if (!named && (VALID_AVX512F_REG_MODE (mode)
7216 || VALID_AVX256_REG_MODE (mode)))
7217 return;
7219 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7220 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7222 cum->nregs -= int_nregs;
7223 cum->sse_nregs -= sse_nregs;
7224 cum->regno += int_nregs;
7225 cum->sse_regno += sse_nregs;
7227 else
7229 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7230 cum->words = (cum->words + align - 1) & ~(align - 1);
7231 cum->words += words;
7235 static void
7236 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7237 HOST_WIDE_INT words)
7239 /* Otherwise, this should be passed indirect. */
7240 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7242 cum->words += words;
7243 if (cum->nregs > 0)
7245 cum->nregs -= 1;
7246 cum->regno += 1;
7250 /* Update the data in CUM to advance over an argument of mode MODE and
7251 data type TYPE. (TYPE is null for libcalls where that information
7252 may not be available.) */
7254 static void
7255 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7256 const_tree type, bool named)
7258 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7259 HOST_WIDE_INT bytes, words;
7261 if (mode == BLKmode)
7262 bytes = int_size_in_bytes (type);
7263 else
7264 bytes = GET_MODE_SIZE (mode);
7265 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7267 if (type)
7268 mode = type_natural_mode (type, NULL, false);
7270 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7271 function_arg_advance_ms_64 (cum, bytes, words);
7272 else if (TARGET_64BIT)
7273 function_arg_advance_64 (cum, mode, type, words, named);
7274 else
7275 function_arg_advance_32 (cum, mode, type, bytes, words);
7278 /* Define where to put the arguments to a function.
7279 Value is zero to push the argument on the stack,
7280 or a hard register in which to store the argument.
7282 MODE is the argument's machine mode.
7283 TYPE is the data type of the argument (as a tree).
7284 This is null for libcalls where that information may
7285 not be available.
7286 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7287 the preceding args and about the function being called.
7288 NAMED is nonzero if this argument is a named parameter
7289 (otherwise it is an extra parameter matching an ellipsis). */
7291 static rtx
7292 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7293 enum machine_mode orig_mode, const_tree type,
7294 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7296 /* Avoid the AL settings for the Unix64 ABI. */
7297 if (mode == VOIDmode)
7298 return constm1_rtx;
7300 switch (mode)
7302 default:
7303 break;
7305 case BLKmode:
7306 if (bytes < 0)
7307 break;
7308 /* FALLTHRU */
7309 case DImode:
7310 case SImode:
7311 case HImode:
7312 case QImode:
7313 if (words <= cum->nregs)
7315 int regno = cum->regno;
7317 /* Fastcall allocates the first two DWORD (SImode) or
7318 smaller arguments to ECX and EDX if it isn't an
7319 aggregate type . */
7320 if (cum->fastcall)
7322 if (mode == BLKmode
7323 || mode == DImode
7324 || (type && AGGREGATE_TYPE_P (type)))
7325 break;
7327 /* ECX not EAX is the first allocated register. */
7328 if (regno == AX_REG)
7329 regno = CX_REG;
7331 return gen_rtx_REG (mode, regno);
7333 break;
7335 case DFmode:
7336 if (cum->float_in_sse < 2)
7337 break;
7338 case SFmode:
7339 if (cum->float_in_sse < 1)
7340 break;
7341 /* FALLTHRU */
7342 case TImode:
7343 /* In 32bit, we pass TImode in xmm registers. */
7344 case V16QImode:
7345 case V8HImode:
7346 case V4SImode:
7347 case V2DImode:
7348 case V4SFmode:
7349 case V2DFmode:
7350 if (!type || !AGGREGATE_TYPE_P (type))
7352 if (cum->sse_nregs)
7353 return gen_reg_or_parallel (mode, orig_mode,
7354 cum->sse_regno + FIRST_SSE_REG);
7356 break;
7358 case OImode:
7359 case XImode:
7360 /* OImode and XImode shouldn't be used directly. */
7361 gcc_unreachable ();
7363 case V64QImode:
7364 case V32HImode:
7365 case V16SImode:
7366 case V8DImode:
7367 case V16SFmode:
7368 case V8DFmode:
7369 case V8SFmode:
7370 case V8SImode:
7371 case V32QImode:
7372 case V16HImode:
7373 case V4DFmode:
7374 case V4DImode:
7375 if (!type || !AGGREGATE_TYPE_P (type))
7377 if (cum->sse_nregs)
7378 return gen_reg_or_parallel (mode, orig_mode,
7379 cum->sse_regno + FIRST_SSE_REG);
7381 break;
7383 case V8QImode:
7384 case V4HImode:
7385 case V2SImode:
7386 case V2SFmode:
7387 case V1TImode:
7388 case V1DImode:
7389 if (!type || !AGGREGATE_TYPE_P (type))
7391 if (cum->mmx_nregs)
7392 return gen_reg_or_parallel (mode, orig_mode,
7393 cum->mmx_regno + FIRST_MMX_REG);
7395 break;
7398 return NULL_RTX;
7401 static rtx
7402 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7403 enum machine_mode orig_mode, const_tree type, bool named)
7405 /* Handle a hidden AL argument containing number of registers
7406 for varargs x86-64 functions. */
7407 if (mode == VOIDmode)
7408 return GEN_INT (cum->maybe_vaarg
7409 ? (cum->sse_nregs < 0
7410 ? X86_64_SSE_REGPARM_MAX
7411 : cum->sse_regno)
7412 : -1);
7414 switch (mode)
7416 default:
7417 break;
7419 case V8SFmode:
7420 case V8SImode:
7421 case V32QImode:
7422 case V16HImode:
7423 case V4DFmode:
7424 case V4DImode:
7425 case V16SFmode:
7426 case V16SImode:
7427 case V64QImode:
7428 case V32HImode:
7429 case V8DFmode:
7430 case V8DImode:
7431 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7432 if (!named)
7433 return NULL;
7434 break;
7437 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7438 cum->sse_nregs,
7439 &x86_64_int_parameter_registers [cum->regno],
7440 cum->sse_regno);
7443 static rtx
7444 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7445 enum machine_mode orig_mode, bool named,
7446 HOST_WIDE_INT bytes)
7448 unsigned int regno;
7450 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7451 We use value of -2 to specify that current function call is MSABI. */
7452 if (mode == VOIDmode)
7453 return GEN_INT (-2);
7455 /* If we've run out of registers, it goes on the stack. */
7456 if (cum->nregs == 0)
7457 return NULL_RTX;
7459 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7461 /* Only floating point modes are passed in anything but integer regs. */
7462 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7464 if (named)
7465 regno = cum->regno + FIRST_SSE_REG;
7466 else
7468 rtx t1, t2;
7470 /* Unnamed floating parameters are passed in both the
7471 SSE and integer registers. */
7472 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7473 t2 = gen_rtx_REG (mode, regno);
7474 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7475 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7476 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7479 /* Handle aggregated types passed in register. */
7480 if (orig_mode == BLKmode)
7482 if (bytes > 0 && bytes <= 8)
7483 mode = (bytes > 4 ? DImode : SImode);
7484 if (mode == BLKmode)
7485 mode = DImode;
7488 return gen_reg_or_parallel (mode, orig_mode, regno);
7491 /* Return where to put the arguments to a function.
7492 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7494 MODE is the argument's machine mode. TYPE is the data type of the
7495 argument. It is null for libcalls where that information may not be
7496 available. CUM gives information about the preceding args and about
7497 the function being called. NAMED is nonzero if this argument is a
7498 named parameter (otherwise it is an extra parameter matching an
7499 ellipsis). */
7501 static rtx
7502 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7503 const_tree type, bool named)
7505 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7506 enum machine_mode mode = omode;
7507 HOST_WIDE_INT bytes, words;
7508 rtx arg;
7510 if (mode == BLKmode)
7511 bytes = int_size_in_bytes (type);
7512 else
7513 bytes = GET_MODE_SIZE (mode);
7514 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7516 /* To simplify the code below, represent vector types with a vector mode
7517 even if MMX/SSE are not active. */
7518 if (type && TREE_CODE (type) == VECTOR_TYPE)
7519 mode = type_natural_mode (type, cum, false);
7521 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7522 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7523 else if (TARGET_64BIT)
7524 arg = function_arg_64 (cum, mode, omode, type, named);
7525 else
7526 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7528 return arg;
7531 /* A C expression that indicates when an argument must be passed by
7532 reference. If nonzero for an argument, a copy of that argument is
7533 made in memory and a pointer to the argument is passed instead of
7534 the argument itself. The pointer is passed in whatever way is
7535 appropriate for passing a pointer to that type. */
7537 static bool
7538 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7539 const_tree type, bool named ATTRIBUTE_UNUSED)
7541 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7543 /* See Windows x64 Software Convention. */
7544 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7546 int msize = (int) GET_MODE_SIZE (mode);
7547 if (type)
7549 /* Arrays are passed by reference. */
7550 if (TREE_CODE (type) == ARRAY_TYPE)
7551 return true;
7553 if (AGGREGATE_TYPE_P (type))
7555 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7556 are passed by reference. */
7557 msize = int_size_in_bytes (type);
7561 /* __m128 is passed by reference. */
7562 switch (msize) {
7563 case 1: case 2: case 4: case 8:
7564 break;
7565 default:
7566 return true;
7569 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7570 return 1;
7572 return 0;
7575 /* Return true when TYPE should be 128bit aligned for 32bit argument
7576 passing ABI. XXX: This function is obsolete and is only used for
7577 checking psABI compatibility with previous versions of GCC. */
7579 static bool
7580 ix86_compat_aligned_value_p (const_tree type)
7582 enum machine_mode mode = TYPE_MODE (type);
7583 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7584 || mode == TDmode
7585 || mode == TFmode
7586 || mode == TCmode)
7587 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7588 return true;
7589 if (TYPE_ALIGN (type) < 128)
7590 return false;
7592 if (AGGREGATE_TYPE_P (type))
7594 /* Walk the aggregates recursively. */
7595 switch (TREE_CODE (type))
7597 case RECORD_TYPE:
7598 case UNION_TYPE:
7599 case QUAL_UNION_TYPE:
7601 tree field;
7603 /* Walk all the structure fields. */
7604 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7606 if (TREE_CODE (field) == FIELD_DECL
7607 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7608 return true;
7610 break;
7613 case ARRAY_TYPE:
7614 /* Just for use if some languages passes arrays by value. */
7615 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7616 return true;
7617 break;
7619 default:
7620 gcc_unreachable ();
7623 return false;
7626 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7627 XXX: This function is obsolete and is only used for checking psABI
7628 compatibility with previous versions of GCC. */
7630 static unsigned int
7631 ix86_compat_function_arg_boundary (enum machine_mode mode,
7632 const_tree type, unsigned int align)
7634 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7635 natural boundaries. */
7636 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7638 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7639 make an exception for SSE modes since these require 128bit
7640 alignment.
7642 The handling here differs from field_alignment. ICC aligns MMX
7643 arguments to 4 byte boundaries, while structure fields are aligned
7644 to 8 byte boundaries. */
7645 if (!type)
7647 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7648 align = PARM_BOUNDARY;
7650 else
7652 if (!ix86_compat_aligned_value_p (type))
7653 align = PARM_BOUNDARY;
7656 if (align > BIGGEST_ALIGNMENT)
7657 align = BIGGEST_ALIGNMENT;
7658 return align;
7661 /* Return true when TYPE should be 128bit aligned for 32bit argument
7662 passing ABI. */
7664 static bool
7665 ix86_contains_aligned_value_p (const_tree type)
7667 enum machine_mode mode = TYPE_MODE (type);
7669 if (mode == XFmode || mode == XCmode)
7670 return false;
7672 if (TYPE_ALIGN (type) < 128)
7673 return false;
7675 if (AGGREGATE_TYPE_P (type))
7677 /* Walk the aggregates recursively. */
7678 switch (TREE_CODE (type))
7680 case RECORD_TYPE:
7681 case UNION_TYPE:
7682 case QUAL_UNION_TYPE:
7684 tree field;
7686 /* Walk all the structure fields. */
7687 for (field = TYPE_FIELDS (type);
7688 field;
7689 field = DECL_CHAIN (field))
7691 if (TREE_CODE (field) == FIELD_DECL
7692 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7693 return true;
7695 break;
7698 case ARRAY_TYPE:
7699 /* Just for use if some languages passes arrays by value. */
7700 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7701 return true;
7702 break;
7704 default:
7705 gcc_unreachable ();
7708 else
7709 return TYPE_ALIGN (type) >= 128;
7711 return false;
7714 /* Gives the alignment boundary, in bits, of an argument with the
7715 specified mode and type. */
7717 static unsigned int
7718 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7720 unsigned int align;
7721 if (type)
7723 /* Since the main variant type is used for call, we convert it to
7724 the main variant type. */
7725 type = TYPE_MAIN_VARIANT (type);
7726 align = TYPE_ALIGN (type);
7728 else
7729 align = GET_MODE_ALIGNMENT (mode);
7730 if (align < PARM_BOUNDARY)
7731 align = PARM_BOUNDARY;
7732 else
7734 static bool warned;
7735 unsigned int saved_align = align;
7737 if (!TARGET_64BIT)
7739 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7740 if (!type)
7742 if (mode == XFmode || mode == XCmode)
7743 align = PARM_BOUNDARY;
7745 else if (!ix86_contains_aligned_value_p (type))
7746 align = PARM_BOUNDARY;
7748 if (align < 128)
7749 align = PARM_BOUNDARY;
7752 if (warn_psabi
7753 && !warned
7754 && align != ix86_compat_function_arg_boundary (mode, type,
7755 saved_align))
7757 warned = true;
7758 inform (input_location,
7759 "The ABI for passing parameters with %d-byte"
7760 " alignment has changed in GCC 4.6",
7761 align / BITS_PER_UNIT);
7765 return align;
7768 /* Return true if N is a possible register number of function value. */
7770 static bool
7771 ix86_function_value_regno_p (const unsigned int regno)
7773 switch (regno)
7775 case AX_REG:
7776 case DX_REG:
7777 return true;
7778 case DI_REG:
7779 case SI_REG:
7780 return TARGET_64BIT && ix86_abi != MS_ABI;
7782 /* Complex values are returned in %st(0)/%st(1) pair. */
7783 case ST0_REG:
7784 case ST1_REG:
7785 /* TODO: The function should depend on current function ABI but
7786 builtins.c would need updating then. Therefore we use the
7787 default ABI. */
7788 if (TARGET_64BIT && ix86_abi == MS_ABI)
7789 return false;
7790 return TARGET_FLOAT_RETURNS_IN_80387;
7792 /* Complex values are returned in %xmm0/%xmm1 pair. */
7793 case XMM0_REG:
7794 case XMM1_REG:
7795 return TARGET_SSE;
7797 case MM0_REG:
7798 if (TARGET_MACHO || TARGET_64BIT)
7799 return false;
7800 return TARGET_MMX;
7803 return false;
7806 /* Define how to find the value returned by a function.
7807 VALTYPE is the data type of the value (as a tree).
7808 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7809 otherwise, FUNC is 0. */
7811 static rtx
7812 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7813 const_tree fntype, const_tree fn)
7815 unsigned int regno;
7817 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7818 we normally prevent this case when mmx is not available. However
7819 some ABIs may require the result to be returned like DImode. */
7820 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7821 regno = FIRST_MMX_REG;
7823 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7824 we prevent this case when sse is not available. However some ABIs
7825 may require the result to be returned like integer TImode. */
7826 else if (mode == TImode
7827 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7828 regno = FIRST_SSE_REG;
7830 /* 32-byte vector modes in %ymm0. */
7831 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7832 regno = FIRST_SSE_REG;
7834 /* 64-byte vector modes in %zmm0. */
7835 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7836 regno = FIRST_SSE_REG;
7838 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7839 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7840 regno = FIRST_FLOAT_REG;
7841 else
7842 /* Most things go in %eax. */
7843 regno = AX_REG;
7845 /* Override FP return register with %xmm0 for local functions when
7846 SSE math is enabled or for functions with sseregparm attribute. */
7847 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7849 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7850 if ((sse_level >= 1 && mode == SFmode)
7851 || (sse_level == 2 && mode == DFmode))
7852 regno = FIRST_SSE_REG;
7855 /* OImode shouldn't be used directly. */
7856 gcc_assert (mode != OImode);
7858 return gen_rtx_REG (orig_mode, regno);
7861 static rtx
7862 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7863 const_tree valtype)
7865 rtx ret;
7867 /* Handle libcalls, which don't provide a type node. */
7868 if (valtype == NULL)
7870 unsigned int regno;
7872 switch (mode)
7874 case SFmode:
7875 case SCmode:
7876 case DFmode:
7877 case DCmode:
7878 case TFmode:
7879 case SDmode:
7880 case DDmode:
7881 case TDmode:
7882 regno = FIRST_SSE_REG;
7883 break;
7884 case XFmode:
7885 case XCmode:
7886 regno = FIRST_FLOAT_REG;
7887 break;
7888 case TCmode:
7889 return NULL;
7890 default:
7891 regno = AX_REG;
7894 return gen_rtx_REG (mode, regno);
7896 else if (POINTER_TYPE_P (valtype))
7898 /* Pointers are always returned in word_mode. */
7899 mode = word_mode;
7902 ret = construct_container (mode, orig_mode, valtype, 1,
7903 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7904 x86_64_int_return_registers, 0);
7906 /* For zero sized structures, construct_container returns NULL, but we
7907 need to keep rest of compiler happy by returning meaningful value. */
7908 if (!ret)
7909 ret = gen_rtx_REG (orig_mode, AX_REG);
7911 return ret;
7914 static rtx
7915 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7916 const_tree valtype)
7918 unsigned int regno = AX_REG;
7920 if (TARGET_SSE)
7922 switch (GET_MODE_SIZE (mode))
7924 case 16:
7925 if (valtype != NULL_TREE
7926 && !VECTOR_INTEGER_TYPE_P (valtype)
7927 && !VECTOR_INTEGER_TYPE_P (valtype)
7928 && !INTEGRAL_TYPE_P (valtype)
7929 && !VECTOR_FLOAT_TYPE_P (valtype))
7930 break;
7931 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7932 && !COMPLEX_MODE_P (mode))
7933 regno = FIRST_SSE_REG;
7934 break;
7935 case 8:
7936 case 4:
7937 if (mode == SFmode || mode == DFmode)
7938 regno = FIRST_SSE_REG;
7939 break;
7940 default:
7941 break;
7944 return gen_rtx_REG (orig_mode, regno);
7947 static rtx
7948 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7949 enum machine_mode orig_mode, enum machine_mode mode)
7951 const_tree fn, fntype;
7953 fn = NULL_TREE;
7954 if (fntype_or_decl && DECL_P (fntype_or_decl))
7955 fn = fntype_or_decl;
7956 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7958 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7959 return function_value_ms_64 (orig_mode, mode, valtype);
7960 else if (TARGET_64BIT)
7961 return function_value_64 (orig_mode, mode, valtype);
7962 else
7963 return function_value_32 (orig_mode, mode, fntype, fn);
7966 static rtx
7967 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7968 bool outgoing ATTRIBUTE_UNUSED)
7970 enum machine_mode mode, orig_mode;
7972 orig_mode = TYPE_MODE (valtype);
7973 mode = type_natural_mode (valtype, NULL, true);
7974 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7977 /* Pointer function arguments and return values are promoted to
7978 word_mode. */
7980 static enum machine_mode
7981 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7982 int *punsignedp, const_tree fntype,
7983 int for_return)
7985 if (type != NULL_TREE && POINTER_TYPE_P (type))
7987 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7988 return word_mode;
7990 return default_promote_function_mode (type, mode, punsignedp, fntype,
7991 for_return);
7994 /* Return true if a structure, union or array with MODE containing FIELD
7995 should be accessed using BLKmode. */
7997 static bool
7998 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8000 /* Union with XFmode must be in BLKmode. */
8001 return (mode == XFmode
8002 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8003 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8007 ix86_libcall_value (enum machine_mode mode)
8009 return ix86_function_value_1 (NULL, NULL, mode, mode);
8012 /* Return true iff type is returned in memory. */
8014 static bool
8015 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8017 #ifdef SUBTARGET_RETURN_IN_MEMORY
8018 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8019 #else
8020 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8021 HOST_WIDE_INT size;
8023 if (TARGET_64BIT)
8025 if (ix86_function_type_abi (fntype) == MS_ABI)
8027 size = int_size_in_bytes (type);
8029 /* __m128 is returned in xmm0. */
8030 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8031 || INTEGRAL_TYPE_P (type)
8032 || VECTOR_FLOAT_TYPE_P (type))
8033 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8034 && !COMPLEX_MODE_P (mode)
8035 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8036 return false;
8038 /* Otherwise, the size must be exactly in [1248]. */
8039 return size != 1 && size != 2 && size != 4 && size != 8;
8041 else
8043 int needed_intregs, needed_sseregs;
8045 return examine_argument (mode, type, 1,
8046 &needed_intregs, &needed_sseregs);
8049 else
8051 if (mode == BLKmode)
8052 return true;
8054 size = int_size_in_bytes (type);
8056 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8057 return false;
8059 if (VECTOR_MODE_P (mode) || mode == TImode)
8061 /* User-created vectors small enough to fit in EAX. */
8062 if (size < 8)
8063 return false;
8065 /* Unless ABI prescibes otherwise,
8066 MMX/3dNow values are returned in MM0 if available. */
8068 if (size == 8)
8069 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8071 /* SSE values are returned in XMM0 if available. */
8072 if (size == 16)
8073 return !TARGET_SSE;
8075 /* AVX values are returned in YMM0 if available. */
8076 if (size == 32)
8077 return !TARGET_AVX;
8079 /* AVX512F values are returned in ZMM0 if available. */
8080 if (size == 64)
8081 return !TARGET_AVX512F;
8084 if (mode == XFmode)
8085 return false;
8087 if (size > 12)
8088 return true;
8090 /* OImode shouldn't be used directly. */
8091 gcc_assert (mode != OImode);
8093 return false;
8095 #endif
8099 /* Create the va_list data type. */
8101 /* Returns the calling convention specific va_list date type.
8102 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8104 static tree
8105 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8107 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8109 /* For i386 we use plain pointer to argument area. */
8110 if (!TARGET_64BIT || abi == MS_ABI)
8111 return build_pointer_type (char_type_node);
8113 record = lang_hooks.types.make_type (RECORD_TYPE);
8114 type_decl = build_decl (BUILTINS_LOCATION,
8115 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8117 f_gpr = build_decl (BUILTINS_LOCATION,
8118 FIELD_DECL, get_identifier ("gp_offset"),
8119 unsigned_type_node);
8120 f_fpr = build_decl (BUILTINS_LOCATION,
8121 FIELD_DECL, get_identifier ("fp_offset"),
8122 unsigned_type_node);
8123 f_ovf = build_decl (BUILTINS_LOCATION,
8124 FIELD_DECL, get_identifier ("overflow_arg_area"),
8125 ptr_type_node);
8126 f_sav = build_decl (BUILTINS_LOCATION,
8127 FIELD_DECL, get_identifier ("reg_save_area"),
8128 ptr_type_node);
8130 va_list_gpr_counter_field = f_gpr;
8131 va_list_fpr_counter_field = f_fpr;
8133 DECL_FIELD_CONTEXT (f_gpr) = record;
8134 DECL_FIELD_CONTEXT (f_fpr) = record;
8135 DECL_FIELD_CONTEXT (f_ovf) = record;
8136 DECL_FIELD_CONTEXT (f_sav) = record;
8138 TYPE_STUB_DECL (record) = type_decl;
8139 TYPE_NAME (record) = type_decl;
8140 TYPE_FIELDS (record) = f_gpr;
8141 DECL_CHAIN (f_gpr) = f_fpr;
8142 DECL_CHAIN (f_fpr) = f_ovf;
8143 DECL_CHAIN (f_ovf) = f_sav;
8145 layout_type (record);
8147 /* The correct type is an array type of one element. */
8148 return build_array_type (record, build_index_type (size_zero_node));
8151 /* Setup the builtin va_list data type and for 64-bit the additional
8152 calling convention specific va_list data types. */
8154 static tree
8155 ix86_build_builtin_va_list (void)
8157 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8159 /* Initialize abi specific va_list builtin types. */
8160 if (TARGET_64BIT)
8162 tree t;
8163 if (ix86_abi == MS_ABI)
8165 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8166 if (TREE_CODE (t) != RECORD_TYPE)
8167 t = build_variant_type_copy (t);
8168 sysv_va_list_type_node = t;
8170 else
8172 t = ret;
8173 if (TREE_CODE (t) != RECORD_TYPE)
8174 t = build_variant_type_copy (t);
8175 sysv_va_list_type_node = t;
8177 if (ix86_abi != MS_ABI)
8179 t = ix86_build_builtin_va_list_abi (MS_ABI);
8180 if (TREE_CODE (t) != RECORD_TYPE)
8181 t = build_variant_type_copy (t);
8182 ms_va_list_type_node = t;
8184 else
8186 t = ret;
8187 if (TREE_CODE (t) != RECORD_TYPE)
8188 t = build_variant_type_copy (t);
8189 ms_va_list_type_node = t;
8193 return ret;
8196 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8198 static void
8199 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8201 rtx save_area, mem;
8202 alias_set_type set;
8203 int i, max;
8205 /* GPR size of varargs save area. */
8206 if (cfun->va_list_gpr_size)
8207 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8208 else
8209 ix86_varargs_gpr_size = 0;
8211 /* FPR size of varargs save area. We don't need it if we don't pass
8212 anything in SSE registers. */
8213 if (TARGET_SSE && cfun->va_list_fpr_size)
8214 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8215 else
8216 ix86_varargs_fpr_size = 0;
8218 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8219 return;
8221 save_area = frame_pointer_rtx;
8222 set = get_varargs_alias_set ();
8224 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8225 if (max > X86_64_REGPARM_MAX)
8226 max = X86_64_REGPARM_MAX;
8228 for (i = cum->regno; i < max; i++)
8230 mem = gen_rtx_MEM (word_mode,
8231 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8232 MEM_NOTRAP_P (mem) = 1;
8233 set_mem_alias_set (mem, set);
8234 emit_move_insn (mem,
8235 gen_rtx_REG (word_mode,
8236 x86_64_int_parameter_registers[i]));
8239 if (ix86_varargs_fpr_size)
8241 enum machine_mode smode;
8242 rtx label, test;
8244 /* Now emit code to save SSE registers. The AX parameter contains number
8245 of SSE parameter registers used to call this function, though all we
8246 actually check here is the zero/non-zero status. */
8248 label = gen_label_rtx ();
8249 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8250 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8251 label));
8253 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8254 we used movdqa (i.e. TImode) instead? Perhaps even better would
8255 be if we could determine the real mode of the data, via a hook
8256 into pass_stdarg. Ignore all that for now. */
8257 smode = V4SFmode;
8258 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8259 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8261 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8262 if (max > X86_64_SSE_REGPARM_MAX)
8263 max = X86_64_SSE_REGPARM_MAX;
8265 for (i = cum->sse_regno; i < max; ++i)
8267 mem = plus_constant (Pmode, save_area,
8268 i * 16 + ix86_varargs_gpr_size);
8269 mem = gen_rtx_MEM (smode, mem);
8270 MEM_NOTRAP_P (mem) = 1;
8271 set_mem_alias_set (mem, set);
8272 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8274 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8277 emit_label (label);
8281 static void
8282 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8284 alias_set_type set = get_varargs_alias_set ();
8285 int i;
8287 /* Reset to zero, as there might be a sysv vaarg used
8288 before. */
8289 ix86_varargs_gpr_size = 0;
8290 ix86_varargs_fpr_size = 0;
8292 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8294 rtx reg, mem;
8296 mem = gen_rtx_MEM (Pmode,
8297 plus_constant (Pmode, virtual_incoming_args_rtx,
8298 i * UNITS_PER_WORD));
8299 MEM_NOTRAP_P (mem) = 1;
8300 set_mem_alias_set (mem, set);
8302 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8303 emit_move_insn (mem, reg);
8307 static void
8308 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8309 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8310 int no_rtl)
8312 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8313 CUMULATIVE_ARGS next_cum;
8314 tree fntype;
8316 /* This argument doesn't appear to be used anymore. Which is good,
8317 because the old code here didn't suppress rtl generation. */
8318 gcc_assert (!no_rtl);
8320 if (!TARGET_64BIT)
8321 return;
8323 fntype = TREE_TYPE (current_function_decl);
8325 /* For varargs, we do not want to skip the dummy va_dcl argument.
8326 For stdargs, we do want to skip the last named argument. */
8327 next_cum = *cum;
8328 if (stdarg_p (fntype))
8329 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8330 true);
8332 if (cum->call_abi == MS_ABI)
8333 setup_incoming_varargs_ms_64 (&next_cum);
8334 else
8335 setup_incoming_varargs_64 (&next_cum);
8338 /* Checks if TYPE is of kind va_list char *. */
8340 static bool
8341 is_va_list_char_pointer (tree type)
8343 tree canonic;
8345 /* For 32-bit it is always true. */
8346 if (!TARGET_64BIT)
8347 return true;
8348 canonic = ix86_canonical_va_list_type (type);
8349 return (canonic == ms_va_list_type_node
8350 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8353 /* Implement va_start. */
8355 static void
8356 ix86_va_start (tree valist, rtx nextarg)
8358 HOST_WIDE_INT words, n_gpr, n_fpr;
8359 tree f_gpr, f_fpr, f_ovf, f_sav;
8360 tree gpr, fpr, ovf, sav, t;
8361 tree type;
8362 rtx ovf_rtx;
8364 if (flag_split_stack
8365 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8367 unsigned int scratch_regno;
8369 /* When we are splitting the stack, we can't refer to the stack
8370 arguments using internal_arg_pointer, because they may be on
8371 the old stack. The split stack prologue will arrange to
8372 leave a pointer to the old stack arguments in a scratch
8373 register, which we here copy to a pseudo-register. The split
8374 stack prologue can't set the pseudo-register directly because
8375 it (the prologue) runs before any registers have been saved. */
8377 scratch_regno = split_stack_prologue_scratch_regno ();
8378 if (scratch_regno != INVALID_REGNUM)
8380 rtx reg, seq;
8382 reg = gen_reg_rtx (Pmode);
8383 cfun->machine->split_stack_varargs_pointer = reg;
8385 start_sequence ();
8386 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8387 seq = get_insns ();
8388 end_sequence ();
8390 push_topmost_sequence ();
8391 emit_insn_after (seq, entry_of_function ());
8392 pop_topmost_sequence ();
8396 /* Only 64bit target needs something special. */
8397 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8399 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8400 std_expand_builtin_va_start (valist, nextarg);
8401 else
8403 rtx va_r, next;
8405 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8406 next = expand_binop (ptr_mode, add_optab,
8407 cfun->machine->split_stack_varargs_pointer,
8408 crtl->args.arg_offset_rtx,
8409 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8410 convert_move (va_r, next, 0);
8412 return;
8415 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8416 f_fpr = DECL_CHAIN (f_gpr);
8417 f_ovf = DECL_CHAIN (f_fpr);
8418 f_sav = DECL_CHAIN (f_ovf);
8420 valist = build_simple_mem_ref (valist);
8421 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8422 /* The following should be folded into the MEM_REF offset. */
8423 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8424 f_gpr, NULL_TREE);
8425 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8426 f_fpr, NULL_TREE);
8427 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8428 f_ovf, NULL_TREE);
8429 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8430 f_sav, NULL_TREE);
8432 /* Count number of gp and fp argument registers used. */
8433 words = crtl->args.info.words;
8434 n_gpr = crtl->args.info.regno;
8435 n_fpr = crtl->args.info.sse_regno;
8437 if (cfun->va_list_gpr_size)
8439 type = TREE_TYPE (gpr);
8440 t = build2 (MODIFY_EXPR, type,
8441 gpr, build_int_cst (type, n_gpr * 8));
8442 TREE_SIDE_EFFECTS (t) = 1;
8443 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8446 if (TARGET_SSE && cfun->va_list_fpr_size)
8448 type = TREE_TYPE (fpr);
8449 t = build2 (MODIFY_EXPR, type, fpr,
8450 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8451 TREE_SIDE_EFFECTS (t) = 1;
8452 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8455 /* Find the overflow area. */
8456 type = TREE_TYPE (ovf);
8457 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8458 ovf_rtx = crtl->args.internal_arg_pointer;
8459 else
8460 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8461 t = make_tree (type, ovf_rtx);
8462 if (words != 0)
8463 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8464 t = build2 (MODIFY_EXPR, type, ovf, t);
8465 TREE_SIDE_EFFECTS (t) = 1;
8466 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8468 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8470 /* Find the register save area.
8471 Prologue of the function save it right above stack frame. */
8472 type = TREE_TYPE (sav);
8473 t = make_tree (type, frame_pointer_rtx);
8474 if (!ix86_varargs_gpr_size)
8475 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8476 t = build2 (MODIFY_EXPR, type, sav, t);
8477 TREE_SIDE_EFFECTS (t) = 1;
8478 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8482 /* Implement va_arg. */
8484 static tree
8485 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8486 gimple_seq *post_p)
8488 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8489 tree f_gpr, f_fpr, f_ovf, f_sav;
8490 tree gpr, fpr, ovf, sav, t;
8491 int size, rsize;
8492 tree lab_false, lab_over = NULL_TREE;
8493 tree addr, t2;
8494 rtx container;
8495 int indirect_p = 0;
8496 tree ptrtype;
8497 enum machine_mode nat_mode;
8498 unsigned int arg_boundary;
8500 /* Only 64bit target needs something special. */
8501 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8502 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8504 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8505 f_fpr = DECL_CHAIN (f_gpr);
8506 f_ovf = DECL_CHAIN (f_fpr);
8507 f_sav = DECL_CHAIN (f_ovf);
8509 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8510 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8511 valist = build_va_arg_indirect_ref (valist);
8512 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8513 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8514 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8516 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8517 if (indirect_p)
8518 type = build_pointer_type (type);
8519 size = int_size_in_bytes (type);
8520 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8522 nat_mode = type_natural_mode (type, NULL, false);
8523 switch (nat_mode)
8525 case V8SFmode:
8526 case V8SImode:
8527 case V32QImode:
8528 case V16HImode:
8529 case V4DFmode:
8530 case V4DImode:
8531 case V16SFmode:
8532 case V16SImode:
8533 case V64QImode:
8534 case V32HImode:
8535 case V8DFmode:
8536 case V8DImode:
8537 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8538 if (!TARGET_64BIT_MS_ABI)
8540 container = NULL;
8541 break;
8544 default:
8545 container = construct_container (nat_mode, TYPE_MODE (type),
8546 type, 0, X86_64_REGPARM_MAX,
8547 X86_64_SSE_REGPARM_MAX, intreg,
8549 break;
8552 /* Pull the value out of the saved registers. */
8554 addr = create_tmp_var (ptr_type_node, "addr");
8556 if (container)
8558 int needed_intregs, needed_sseregs;
8559 bool need_temp;
8560 tree int_addr, sse_addr;
8562 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8563 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8565 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8567 need_temp = (!REG_P (container)
8568 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8569 || TYPE_ALIGN (type) > 128));
8571 /* In case we are passing structure, verify that it is consecutive block
8572 on the register save area. If not we need to do moves. */
8573 if (!need_temp && !REG_P (container))
8575 /* Verify that all registers are strictly consecutive */
8576 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8578 int i;
8580 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8582 rtx slot = XVECEXP (container, 0, i);
8583 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8584 || INTVAL (XEXP (slot, 1)) != i * 16)
8585 need_temp = 1;
8588 else
8590 int i;
8592 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8594 rtx slot = XVECEXP (container, 0, i);
8595 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8596 || INTVAL (XEXP (slot, 1)) != i * 8)
8597 need_temp = 1;
8601 if (!need_temp)
8603 int_addr = addr;
8604 sse_addr = addr;
8606 else
8608 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8609 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8612 /* First ensure that we fit completely in registers. */
8613 if (needed_intregs)
8615 t = build_int_cst (TREE_TYPE (gpr),
8616 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8617 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8618 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8619 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8620 gimplify_and_add (t, pre_p);
8622 if (needed_sseregs)
8624 t = build_int_cst (TREE_TYPE (fpr),
8625 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8626 + X86_64_REGPARM_MAX * 8);
8627 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8628 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8629 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8630 gimplify_and_add (t, pre_p);
8633 /* Compute index to start of area used for integer regs. */
8634 if (needed_intregs)
8636 /* int_addr = gpr + sav; */
8637 t = fold_build_pointer_plus (sav, gpr);
8638 gimplify_assign (int_addr, t, pre_p);
8640 if (needed_sseregs)
8642 /* sse_addr = fpr + sav; */
8643 t = fold_build_pointer_plus (sav, fpr);
8644 gimplify_assign (sse_addr, t, pre_p);
8646 if (need_temp)
8648 int i, prev_size = 0;
8649 tree temp = create_tmp_var (type, "va_arg_tmp");
8651 /* addr = &temp; */
8652 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8653 gimplify_assign (addr, t, pre_p);
8655 for (i = 0; i < XVECLEN (container, 0); i++)
8657 rtx slot = XVECEXP (container, 0, i);
8658 rtx reg = XEXP (slot, 0);
8659 enum machine_mode mode = GET_MODE (reg);
8660 tree piece_type;
8661 tree addr_type;
8662 tree daddr_type;
8663 tree src_addr, src;
8664 int src_offset;
8665 tree dest_addr, dest;
8666 int cur_size = GET_MODE_SIZE (mode);
8668 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8669 prev_size = INTVAL (XEXP (slot, 1));
8670 if (prev_size + cur_size > size)
8672 cur_size = size - prev_size;
8673 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8674 if (mode == BLKmode)
8675 mode = QImode;
8677 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8678 if (mode == GET_MODE (reg))
8679 addr_type = build_pointer_type (piece_type);
8680 else
8681 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8682 true);
8683 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8684 true);
8686 if (SSE_REGNO_P (REGNO (reg)))
8688 src_addr = sse_addr;
8689 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8691 else
8693 src_addr = int_addr;
8694 src_offset = REGNO (reg) * 8;
8696 src_addr = fold_convert (addr_type, src_addr);
8697 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8699 dest_addr = fold_convert (daddr_type, addr);
8700 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8701 if (cur_size == GET_MODE_SIZE (mode))
8703 src = build_va_arg_indirect_ref (src_addr);
8704 dest = build_va_arg_indirect_ref (dest_addr);
8706 gimplify_assign (dest, src, pre_p);
8708 else
8710 tree copy
8711 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8712 3, dest_addr, src_addr,
8713 size_int (cur_size));
8714 gimplify_and_add (copy, pre_p);
8716 prev_size += cur_size;
8720 if (needed_intregs)
8722 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8723 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8724 gimplify_assign (gpr, t, pre_p);
8727 if (needed_sseregs)
8729 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8730 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8731 gimplify_assign (fpr, t, pre_p);
8734 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8736 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8739 /* ... otherwise out of the overflow area. */
8741 /* When we align parameter on stack for caller, if the parameter
8742 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8743 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8744 here with caller. */
8745 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8746 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8747 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8749 /* Care for on-stack alignment if needed. */
8750 if (arg_boundary <= 64 || size == 0)
8751 t = ovf;
8752 else
8754 HOST_WIDE_INT align = arg_boundary / 8;
8755 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8756 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8757 build_int_cst (TREE_TYPE (t), -align));
8760 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8761 gimplify_assign (addr, t, pre_p);
8763 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8764 gimplify_assign (unshare_expr (ovf), t, pre_p);
8766 if (container)
8767 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8769 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8770 addr = fold_convert (ptrtype, addr);
8772 if (indirect_p)
8773 addr = build_va_arg_indirect_ref (addr);
8774 return build_va_arg_indirect_ref (addr);
8777 /* Return true if OPNUM's MEM should be matched
8778 in movabs* patterns. */
8780 bool
8781 ix86_check_movabs (rtx insn, int opnum)
8783 rtx set, mem;
8785 set = PATTERN (insn);
8786 if (GET_CODE (set) == PARALLEL)
8787 set = XVECEXP (set, 0, 0);
8788 gcc_assert (GET_CODE (set) == SET);
8789 mem = XEXP (set, opnum);
8790 while (GET_CODE (mem) == SUBREG)
8791 mem = SUBREG_REG (mem);
8792 gcc_assert (MEM_P (mem));
8793 return volatile_ok || !MEM_VOLATILE_P (mem);
8796 /* Initialize the table of extra 80387 mathematical constants. */
8798 static void
8799 init_ext_80387_constants (void)
8801 static const char * cst[5] =
8803 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8804 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8805 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8806 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8807 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8809 int i;
8811 for (i = 0; i < 5; i++)
8813 real_from_string (&ext_80387_constants_table[i], cst[i]);
8814 /* Ensure each constant is rounded to XFmode precision. */
8815 real_convert (&ext_80387_constants_table[i],
8816 XFmode, &ext_80387_constants_table[i]);
8819 ext_80387_constants_init = 1;
8822 /* Return non-zero if the constant is something that
8823 can be loaded with a special instruction. */
8826 standard_80387_constant_p (rtx x)
8828 enum machine_mode mode = GET_MODE (x);
8830 REAL_VALUE_TYPE r;
8832 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8833 return -1;
8835 if (x == CONST0_RTX (mode))
8836 return 1;
8837 if (x == CONST1_RTX (mode))
8838 return 2;
8840 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8842 /* For XFmode constants, try to find a special 80387 instruction when
8843 optimizing for size or on those CPUs that benefit from them. */
8844 if (mode == XFmode
8845 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8847 int i;
8849 if (! ext_80387_constants_init)
8850 init_ext_80387_constants ();
8852 for (i = 0; i < 5; i++)
8853 if (real_identical (&r, &ext_80387_constants_table[i]))
8854 return i + 3;
8857 /* Load of the constant -0.0 or -1.0 will be split as
8858 fldz;fchs or fld1;fchs sequence. */
8859 if (real_isnegzero (&r))
8860 return 8;
8861 if (real_identical (&r, &dconstm1))
8862 return 9;
8864 return 0;
8867 /* Return the opcode of the special instruction to be used to load
8868 the constant X. */
8870 const char *
8871 standard_80387_constant_opcode (rtx x)
8873 switch (standard_80387_constant_p (x))
8875 case 1:
8876 return "fldz";
8877 case 2:
8878 return "fld1";
8879 case 3:
8880 return "fldlg2";
8881 case 4:
8882 return "fldln2";
8883 case 5:
8884 return "fldl2e";
8885 case 6:
8886 return "fldl2t";
8887 case 7:
8888 return "fldpi";
8889 case 8:
8890 case 9:
8891 return "#";
8892 default:
8893 gcc_unreachable ();
8897 /* Return the CONST_DOUBLE representing the 80387 constant that is
8898 loaded by the specified special instruction. The argument IDX
8899 matches the return value from standard_80387_constant_p. */
8902 standard_80387_constant_rtx (int idx)
8904 int i;
8906 if (! ext_80387_constants_init)
8907 init_ext_80387_constants ();
8909 switch (idx)
8911 case 3:
8912 case 4:
8913 case 5:
8914 case 6:
8915 case 7:
8916 i = idx - 3;
8917 break;
8919 default:
8920 gcc_unreachable ();
8923 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8924 XFmode);
8927 /* Return 1 if X is all 0s and 2 if x is all 1s
8928 in supported SSE/AVX vector mode. */
8931 standard_sse_constant_p (rtx x)
8933 enum machine_mode mode = GET_MODE (x);
8935 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8936 return 1;
8937 if (vector_all_ones_operand (x, mode))
8938 switch (mode)
8940 case V16QImode:
8941 case V8HImode:
8942 case V4SImode:
8943 case V2DImode:
8944 if (TARGET_SSE2)
8945 return 2;
8946 case V32QImode:
8947 case V16HImode:
8948 case V8SImode:
8949 case V4DImode:
8950 if (TARGET_AVX2)
8951 return 2;
8952 case V64QImode:
8953 case V32HImode:
8954 case V16SImode:
8955 case V8DImode:
8956 if (TARGET_AVX512F)
8957 return 2;
8958 default:
8959 break;
8962 return 0;
8965 /* Return the opcode of the special instruction to be used to load
8966 the constant X. */
8968 const char *
8969 standard_sse_constant_opcode (rtx insn, rtx x)
8971 switch (standard_sse_constant_p (x))
8973 case 1:
8974 switch (get_attr_mode (insn))
8976 case MODE_XI:
8977 case MODE_V16SF:
8978 return "vpxord\t%g0, %g0, %g0";
8979 case MODE_V8DF:
8980 return "vpxorq\t%g0, %g0, %g0";
8981 case MODE_TI:
8982 return "%vpxor\t%0, %d0";
8983 case MODE_V2DF:
8984 return "%vxorpd\t%0, %d0";
8985 case MODE_V4SF:
8986 return "%vxorps\t%0, %d0";
8988 case MODE_OI:
8989 return "vpxor\t%x0, %x0, %x0";
8990 case MODE_V4DF:
8991 return "vxorpd\t%x0, %x0, %x0";
8992 case MODE_V8SF:
8993 return "vxorps\t%x0, %x0, %x0";
8995 default:
8996 break;
8999 case 2:
9000 if (get_attr_mode (insn) == MODE_XI
9001 || get_attr_mode (insn) == MODE_V8DF
9002 || get_attr_mode (insn) == MODE_V16SF)
9003 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9004 if (TARGET_AVX)
9005 return "vpcmpeqd\t%0, %0, %0";
9006 else
9007 return "pcmpeqd\t%0, %0";
9009 default:
9010 break;
9012 gcc_unreachable ();
9015 /* Returns true if OP contains a symbol reference */
9017 bool
9018 symbolic_reference_mentioned_p (rtx op)
9020 const char *fmt;
9021 int i;
9023 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9024 return true;
9026 fmt = GET_RTX_FORMAT (GET_CODE (op));
9027 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9029 if (fmt[i] == 'E')
9031 int j;
9033 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9034 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9035 return true;
9038 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9039 return true;
9042 return false;
9045 /* Return true if it is appropriate to emit `ret' instructions in the
9046 body of a function. Do this only if the epilogue is simple, needing a
9047 couple of insns. Prior to reloading, we can't tell how many registers
9048 must be saved, so return false then. Return false if there is no frame
9049 marker to de-allocate. */
9051 bool
9052 ix86_can_use_return_insn_p (void)
9054 struct ix86_frame frame;
9056 if (! reload_completed || frame_pointer_needed)
9057 return 0;
9059 /* Don't allow more than 32k pop, since that's all we can do
9060 with one instruction. */
9061 if (crtl->args.pops_args && crtl->args.size >= 32768)
9062 return 0;
9064 ix86_compute_frame_layout (&frame);
9065 return (frame.stack_pointer_offset == UNITS_PER_WORD
9066 && (frame.nregs + frame.nsseregs) == 0);
9069 /* Value should be nonzero if functions must have frame pointers.
9070 Zero means the frame pointer need not be set up (and parms may
9071 be accessed via the stack pointer) in functions that seem suitable. */
9073 static bool
9074 ix86_frame_pointer_required (void)
9076 /* If we accessed previous frames, then the generated code expects
9077 to be able to access the saved ebp value in our frame. */
9078 if (cfun->machine->accesses_prev_frame)
9079 return true;
9081 /* Several x86 os'es need a frame pointer for other reasons,
9082 usually pertaining to setjmp. */
9083 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9084 return true;
9086 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9087 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9088 return true;
9090 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9091 allocation is 4GB. */
9092 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9093 return true;
9095 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9096 turns off the frame pointer by default. Turn it back on now if
9097 we've not got a leaf function. */
9098 if (TARGET_OMIT_LEAF_FRAME_POINTER
9099 && (!crtl->is_leaf
9100 || ix86_current_function_calls_tls_descriptor))
9101 return true;
9103 if (crtl->profile && !flag_fentry)
9104 return true;
9106 return false;
9109 /* Record that the current function accesses previous call frames. */
9111 void
9112 ix86_setup_frame_addresses (void)
9114 cfun->machine->accesses_prev_frame = 1;
9117 #ifndef USE_HIDDEN_LINKONCE
9118 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9119 # define USE_HIDDEN_LINKONCE 1
9120 # else
9121 # define USE_HIDDEN_LINKONCE 0
9122 # endif
9123 #endif
9125 static int pic_labels_used;
9127 /* Fills in the label name that should be used for a pc thunk for
9128 the given register. */
9130 static void
9131 get_pc_thunk_name (char name[32], unsigned int regno)
9133 gcc_assert (!TARGET_64BIT);
9135 if (USE_HIDDEN_LINKONCE)
9136 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9137 else
9138 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9142 /* This function generates code for -fpic that loads %ebx with
9143 the return address of the caller and then returns. */
9145 static void
9146 ix86_code_end (void)
9148 rtx xops[2];
9149 int regno;
9151 for (regno = AX_REG; regno <= SP_REG; regno++)
9153 char name[32];
9154 tree decl;
9156 if (!(pic_labels_used & (1 << regno)))
9157 continue;
9159 get_pc_thunk_name (name, regno);
9161 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9162 get_identifier (name),
9163 build_function_type_list (void_type_node, NULL_TREE));
9164 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9165 NULL_TREE, void_type_node);
9166 TREE_PUBLIC (decl) = 1;
9167 TREE_STATIC (decl) = 1;
9168 DECL_IGNORED_P (decl) = 1;
9170 #if TARGET_MACHO
9171 if (TARGET_MACHO)
9173 switch_to_section (darwin_sections[text_coal_section]);
9174 fputs ("\t.weak_definition\t", asm_out_file);
9175 assemble_name (asm_out_file, name);
9176 fputs ("\n\t.private_extern\t", asm_out_file);
9177 assemble_name (asm_out_file, name);
9178 putc ('\n', asm_out_file);
9179 ASM_OUTPUT_LABEL (asm_out_file, name);
9180 DECL_WEAK (decl) = 1;
9182 else
9183 #endif
9184 if (USE_HIDDEN_LINKONCE)
9186 cgraph_create_node (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9188 targetm.asm_out.unique_section (decl, 0);
9189 switch_to_section (get_named_section (decl, NULL, 0));
9191 targetm.asm_out.globalize_label (asm_out_file, name);
9192 fputs ("\t.hidden\t", asm_out_file);
9193 assemble_name (asm_out_file, name);
9194 putc ('\n', asm_out_file);
9195 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9197 else
9199 switch_to_section (text_section);
9200 ASM_OUTPUT_LABEL (asm_out_file, name);
9203 DECL_INITIAL (decl) = make_node (BLOCK);
9204 current_function_decl = decl;
9205 init_function_start (decl);
9206 first_function_block_is_cold = false;
9207 /* Make sure unwind info is emitted for the thunk if needed. */
9208 final_start_function (emit_barrier (), asm_out_file, 1);
9210 /* Pad stack IP move with 4 instructions (two NOPs count
9211 as one instruction). */
9212 if (TARGET_PAD_SHORT_FUNCTION)
9214 int i = 8;
9216 while (i--)
9217 fputs ("\tnop\n", asm_out_file);
9220 xops[0] = gen_rtx_REG (Pmode, regno);
9221 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9222 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9223 fputs ("\tret\n", asm_out_file);
9224 final_end_function ();
9225 init_insn_lengths ();
9226 free_after_compilation (cfun);
9227 set_cfun (NULL);
9228 current_function_decl = NULL;
9231 if (flag_split_stack)
9232 file_end_indicate_split_stack ();
9235 /* Emit code for the SET_GOT patterns. */
9237 const char *
9238 output_set_got (rtx dest, rtx label)
9240 rtx xops[3];
9242 xops[0] = dest;
9244 if (TARGET_VXWORKS_RTP && flag_pic)
9246 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9247 xops[2] = gen_rtx_MEM (Pmode,
9248 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9249 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9251 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9252 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9253 an unadorned address. */
9254 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9255 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9256 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9257 return "";
9260 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9262 if (!flag_pic)
9264 if (TARGET_MACHO)
9265 /* We don't need a pic base, we're not producing pic. */
9266 gcc_unreachable ();
9268 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9269 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9270 targetm.asm_out.internal_label (asm_out_file, "L",
9271 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9273 else
9275 char name[32];
9276 get_pc_thunk_name (name, REGNO (dest));
9277 pic_labels_used |= 1 << REGNO (dest);
9279 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9280 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9281 output_asm_insn ("call\t%X2", xops);
9283 #if TARGET_MACHO
9284 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9285 This is what will be referenced by the Mach-O PIC subsystem. */
9286 if (machopic_should_output_picbase_label () || !label)
9287 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9289 /* When we are restoring the pic base at the site of a nonlocal label,
9290 and we decided to emit the pic base above, we will still output a
9291 local label used for calculating the correction offset (even though
9292 the offset will be 0 in that case). */
9293 if (label)
9294 targetm.asm_out.internal_label (asm_out_file, "L",
9295 CODE_LABEL_NUMBER (label));
9296 #endif
9299 if (!TARGET_MACHO)
9300 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9302 return "";
9305 /* Generate an "push" pattern for input ARG. */
9307 static rtx
9308 gen_push (rtx arg)
9310 struct machine_function *m = cfun->machine;
9312 if (m->fs.cfa_reg == stack_pointer_rtx)
9313 m->fs.cfa_offset += UNITS_PER_WORD;
9314 m->fs.sp_offset += UNITS_PER_WORD;
9316 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9317 arg = gen_rtx_REG (word_mode, REGNO (arg));
9319 return gen_rtx_SET (VOIDmode,
9320 gen_rtx_MEM (word_mode,
9321 gen_rtx_PRE_DEC (Pmode,
9322 stack_pointer_rtx)),
9323 arg);
9326 /* Generate an "pop" pattern for input ARG. */
9328 static rtx
9329 gen_pop (rtx arg)
9331 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9332 arg = gen_rtx_REG (word_mode, REGNO (arg));
9334 return gen_rtx_SET (VOIDmode,
9335 arg,
9336 gen_rtx_MEM (word_mode,
9337 gen_rtx_POST_INC (Pmode,
9338 stack_pointer_rtx)));
9341 /* Return >= 0 if there is an unused call-clobbered register available
9342 for the entire function. */
9344 static unsigned int
9345 ix86_select_alt_pic_regnum (void)
9347 if (crtl->is_leaf
9348 && !crtl->profile
9349 && !ix86_current_function_calls_tls_descriptor)
9351 int i, drap;
9352 /* Can't use the same register for both PIC and DRAP. */
9353 if (crtl->drap_reg)
9354 drap = REGNO (crtl->drap_reg);
9355 else
9356 drap = -1;
9357 for (i = 2; i >= 0; --i)
9358 if (i != drap && !df_regs_ever_live_p (i))
9359 return i;
9362 return INVALID_REGNUM;
9365 /* Return TRUE if we need to save REGNO. */
9367 static bool
9368 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9370 if (pic_offset_table_rtx
9371 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9372 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9373 || crtl->profile
9374 || crtl->calls_eh_return
9375 || crtl->uses_const_pool
9376 || cfun->has_nonlocal_label))
9377 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9379 if (crtl->calls_eh_return && maybe_eh_return)
9381 unsigned i;
9382 for (i = 0; ; i++)
9384 unsigned test = EH_RETURN_DATA_REGNO (i);
9385 if (test == INVALID_REGNUM)
9386 break;
9387 if (test == regno)
9388 return true;
9392 if (crtl->drap_reg
9393 && regno == REGNO (crtl->drap_reg)
9394 && !cfun->machine->no_drap_save_restore)
9395 return true;
9397 return (df_regs_ever_live_p (regno)
9398 && !call_used_regs[regno]
9399 && !fixed_regs[regno]
9400 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9403 /* Return number of saved general prupose registers. */
9405 static int
9406 ix86_nsaved_regs (void)
9408 int nregs = 0;
9409 int regno;
9411 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9412 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9413 nregs ++;
9414 return nregs;
9417 /* Return number of saved SSE registrers. */
9419 static int
9420 ix86_nsaved_sseregs (void)
9422 int nregs = 0;
9423 int regno;
9425 if (!TARGET_64BIT_MS_ABI)
9426 return 0;
9427 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9428 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9429 nregs ++;
9430 return nregs;
9433 /* Given FROM and TO register numbers, say whether this elimination is
9434 allowed. If stack alignment is needed, we can only replace argument
9435 pointer with hard frame pointer, or replace frame pointer with stack
9436 pointer. Otherwise, frame pointer elimination is automatically
9437 handled and all other eliminations are valid. */
9439 static bool
9440 ix86_can_eliminate (const int from, const int to)
9442 if (stack_realign_fp)
9443 return ((from == ARG_POINTER_REGNUM
9444 && to == HARD_FRAME_POINTER_REGNUM)
9445 || (from == FRAME_POINTER_REGNUM
9446 && to == STACK_POINTER_REGNUM));
9447 else
9448 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9451 /* Return the offset between two registers, one to be eliminated, and the other
9452 its replacement, at the start of a routine. */
9454 HOST_WIDE_INT
9455 ix86_initial_elimination_offset (int from, int to)
9457 struct ix86_frame frame;
9458 ix86_compute_frame_layout (&frame);
9460 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9461 return frame.hard_frame_pointer_offset;
9462 else if (from == FRAME_POINTER_REGNUM
9463 && to == HARD_FRAME_POINTER_REGNUM)
9464 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9465 else
9467 gcc_assert (to == STACK_POINTER_REGNUM);
9469 if (from == ARG_POINTER_REGNUM)
9470 return frame.stack_pointer_offset;
9472 gcc_assert (from == FRAME_POINTER_REGNUM);
9473 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9477 /* In a dynamically-aligned function, we can't know the offset from
9478 stack pointer to frame pointer, so we must ensure that setjmp
9479 eliminates fp against the hard fp (%ebp) rather than trying to
9480 index from %esp up to the top of the frame across a gap that is
9481 of unknown (at compile-time) size. */
9482 static rtx
9483 ix86_builtin_setjmp_frame_value (void)
9485 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9488 /* When using -fsplit-stack, the allocation routines set a field in
9489 the TCB to the bottom of the stack plus this much space, measured
9490 in bytes. */
9492 #define SPLIT_STACK_AVAILABLE 256
9494 /* Fill structure ix86_frame about frame of currently computed function. */
9496 static void
9497 ix86_compute_frame_layout (struct ix86_frame *frame)
9499 unsigned HOST_WIDE_INT stack_alignment_needed;
9500 HOST_WIDE_INT offset;
9501 unsigned HOST_WIDE_INT preferred_alignment;
9502 HOST_WIDE_INT size = get_frame_size ();
9503 HOST_WIDE_INT to_allocate;
9505 frame->nregs = ix86_nsaved_regs ();
9506 frame->nsseregs = ix86_nsaved_sseregs ();
9508 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9509 function prologues and leaf. */
9510 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9511 && (!crtl->is_leaf || cfun->calls_alloca != 0
9512 || ix86_current_function_calls_tls_descriptor))
9514 crtl->preferred_stack_boundary = 128;
9515 crtl->stack_alignment_needed = 128;
9517 /* preferred_stack_boundary is never updated for call
9518 expanded from tls descriptor. Update it here. We don't update it in
9519 expand stage because according to the comments before
9520 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9521 away. */
9522 else if (ix86_current_function_calls_tls_descriptor
9523 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9525 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9526 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9527 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9530 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9531 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9533 gcc_assert (!size || stack_alignment_needed);
9534 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9535 gcc_assert (preferred_alignment <= stack_alignment_needed);
9537 /* For SEH we have to limit the amount of code movement into the prologue.
9538 At present we do this via a BLOCKAGE, at which point there's very little
9539 scheduling that can be done, which means that there's very little point
9540 in doing anything except PUSHs. */
9541 if (TARGET_SEH)
9542 cfun->machine->use_fast_prologue_epilogue = false;
9544 /* During reload iteration the amount of registers saved can change.
9545 Recompute the value as needed. Do not recompute when amount of registers
9546 didn't change as reload does multiple calls to the function and does not
9547 expect the decision to change within single iteration. */
9548 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9549 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9551 int count = frame->nregs;
9552 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9554 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9556 /* The fast prologue uses move instead of push to save registers. This
9557 is significantly longer, but also executes faster as modern hardware
9558 can execute the moves in parallel, but can't do that for push/pop.
9560 Be careful about choosing what prologue to emit: When function takes
9561 many instructions to execute we may use slow version as well as in
9562 case function is known to be outside hot spot (this is known with
9563 feedback only). Weight the size of function by number of registers
9564 to save as it is cheap to use one or two push instructions but very
9565 slow to use many of them. */
9566 if (count)
9567 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9568 if (node->frequency < NODE_FREQUENCY_NORMAL
9569 || (flag_branch_probabilities
9570 && node->frequency < NODE_FREQUENCY_HOT))
9571 cfun->machine->use_fast_prologue_epilogue = false;
9572 else
9573 cfun->machine->use_fast_prologue_epilogue
9574 = !expensive_function_p (count);
9577 frame->save_regs_using_mov
9578 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9579 /* If static stack checking is enabled and done with probes,
9580 the registers need to be saved before allocating the frame. */
9581 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9583 /* Skip return address. */
9584 offset = UNITS_PER_WORD;
9586 /* Skip pushed static chain. */
9587 if (ix86_static_chain_on_stack)
9588 offset += UNITS_PER_WORD;
9590 /* Skip saved base pointer. */
9591 if (frame_pointer_needed)
9592 offset += UNITS_PER_WORD;
9593 frame->hfp_save_offset = offset;
9595 /* The traditional frame pointer location is at the top of the frame. */
9596 frame->hard_frame_pointer_offset = offset;
9598 /* Register save area */
9599 offset += frame->nregs * UNITS_PER_WORD;
9600 frame->reg_save_offset = offset;
9602 /* On SEH target, registers are pushed just before the frame pointer
9603 location. */
9604 if (TARGET_SEH)
9605 frame->hard_frame_pointer_offset = offset;
9607 /* Align and set SSE register save area. */
9608 if (frame->nsseregs)
9610 /* The only ABI that has saved SSE registers (Win64) also has a
9611 16-byte aligned default stack, and thus we don't need to be
9612 within the re-aligned local stack frame to save them. */
9613 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9614 offset = (offset + 16 - 1) & -16;
9615 offset += frame->nsseregs * 16;
9617 frame->sse_reg_save_offset = offset;
9619 /* The re-aligned stack starts here. Values before this point are not
9620 directly comparable with values below this point. In order to make
9621 sure that no value happens to be the same before and after, force
9622 the alignment computation below to add a non-zero value. */
9623 if (stack_realign_fp)
9624 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9626 /* Va-arg area */
9627 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9628 offset += frame->va_arg_size;
9630 /* Align start of frame for local function. */
9631 if (stack_realign_fp
9632 || offset != frame->sse_reg_save_offset
9633 || size != 0
9634 || !crtl->is_leaf
9635 || cfun->calls_alloca
9636 || ix86_current_function_calls_tls_descriptor)
9637 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9639 /* Frame pointer points here. */
9640 frame->frame_pointer_offset = offset;
9642 offset += size;
9644 /* Add outgoing arguments area. Can be skipped if we eliminated
9645 all the function calls as dead code.
9646 Skipping is however impossible when function calls alloca. Alloca
9647 expander assumes that last crtl->outgoing_args_size
9648 of stack frame are unused. */
9649 if (ACCUMULATE_OUTGOING_ARGS
9650 && (!crtl->is_leaf || cfun->calls_alloca
9651 || ix86_current_function_calls_tls_descriptor))
9653 offset += crtl->outgoing_args_size;
9654 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9656 else
9657 frame->outgoing_arguments_size = 0;
9659 /* Align stack boundary. Only needed if we're calling another function
9660 or using alloca. */
9661 if (!crtl->is_leaf || cfun->calls_alloca
9662 || ix86_current_function_calls_tls_descriptor)
9663 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9665 /* We've reached end of stack frame. */
9666 frame->stack_pointer_offset = offset;
9668 /* Size prologue needs to allocate. */
9669 to_allocate = offset - frame->sse_reg_save_offset;
9671 if ((!to_allocate && frame->nregs <= 1)
9672 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9673 frame->save_regs_using_mov = false;
9675 if (ix86_using_red_zone ()
9676 && crtl->sp_is_unchanging
9677 && crtl->is_leaf
9678 && !ix86_current_function_calls_tls_descriptor)
9680 frame->red_zone_size = to_allocate;
9681 if (frame->save_regs_using_mov)
9682 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9683 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9684 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9686 else
9687 frame->red_zone_size = 0;
9688 frame->stack_pointer_offset -= frame->red_zone_size;
9690 /* The SEH frame pointer location is near the bottom of the frame.
9691 This is enforced by the fact that the difference between the
9692 stack pointer and the frame pointer is limited to 240 bytes in
9693 the unwind data structure. */
9694 if (TARGET_SEH)
9696 HOST_WIDE_INT diff;
9698 /* If we can leave the frame pointer where it is, do so. Also, returns
9699 the establisher frame for __builtin_frame_address (0). */
9700 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9701 if (diff <= SEH_MAX_FRAME_SIZE
9702 && (diff > 240 || (diff & 15) != 0)
9703 && !crtl->accesses_prior_frames)
9705 /* Ideally we'd determine what portion of the local stack frame
9706 (within the constraint of the lowest 240) is most heavily used.
9707 But without that complication, simply bias the frame pointer
9708 by 128 bytes so as to maximize the amount of the local stack
9709 frame that is addressable with 8-bit offsets. */
9710 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9715 /* This is semi-inlined memory_address_length, but simplified
9716 since we know that we're always dealing with reg+offset, and
9717 to avoid having to create and discard all that rtl. */
9719 static inline int
9720 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9722 int len = 4;
9724 if (offset == 0)
9726 /* EBP and R13 cannot be encoded without an offset. */
9727 len = (regno == BP_REG || regno == R13_REG);
9729 else if (IN_RANGE (offset, -128, 127))
9730 len = 1;
9732 /* ESP and R12 must be encoded with a SIB byte. */
9733 if (regno == SP_REG || regno == R12_REG)
9734 len++;
9736 return len;
9739 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9740 The valid base registers are taken from CFUN->MACHINE->FS. */
9742 static rtx
9743 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9745 const struct machine_function *m = cfun->machine;
9746 rtx base_reg = NULL;
9747 HOST_WIDE_INT base_offset = 0;
9749 if (m->use_fast_prologue_epilogue)
9751 /* Choose the base register most likely to allow the most scheduling
9752 opportunities. Generally FP is valid throughout the function,
9753 while DRAP must be reloaded within the epilogue. But choose either
9754 over the SP due to increased encoding size. */
9756 if (m->fs.fp_valid)
9758 base_reg = hard_frame_pointer_rtx;
9759 base_offset = m->fs.fp_offset - cfa_offset;
9761 else if (m->fs.drap_valid)
9763 base_reg = crtl->drap_reg;
9764 base_offset = 0 - cfa_offset;
9766 else if (m->fs.sp_valid)
9768 base_reg = stack_pointer_rtx;
9769 base_offset = m->fs.sp_offset - cfa_offset;
9772 else
9774 HOST_WIDE_INT toffset;
9775 int len = 16, tlen;
9777 /* Choose the base register with the smallest address encoding.
9778 With a tie, choose FP > DRAP > SP. */
9779 if (m->fs.sp_valid)
9781 base_reg = stack_pointer_rtx;
9782 base_offset = m->fs.sp_offset - cfa_offset;
9783 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9785 if (m->fs.drap_valid)
9787 toffset = 0 - cfa_offset;
9788 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9789 if (tlen <= len)
9791 base_reg = crtl->drap_reg;
9792 base_offset = toffset;
9793 len = tlen;
9796 if (m->fs.fp_valid)
9798 toffset = m->fs.fp_offset - cfa_offset;
9799 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9800 if (tlen <= len)
9802 base_reg = hard_frame_pointer_rtx;
9803 base_offset = toffset;
9804 len = tlen;
9808 gcc_assert (base_reg != NULL);
9810 return plus_constant (Pmode, base_reg, base_offset);
9813 /* Emit code to save registers in the prologue. */
9815 static void
9816 ix86_emit_save_regs (void)
9818 unsigned int regno;
9819 rtx insn;
9821 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9822 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9824 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9825 RTX_FRAME_RELATED_P (insn) = 1;
9829 /* Emit a single register save at CFA - CFA_OFFSET. */
9831 static void
9832 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9833 HOST_WIDE_INT cfa_offset)
9835 struct machine_function *m = cfun->machine;
9836 rtx reg = gen_rtx_REG (mode, regno);
9837 rtx mem, addr, base, insn;
9839 addr = choose_baseaddr (cfa_offset);
9840 mem = gen_frame_mem (mode, addr);
9842 /* For SSE saves, we need to indicate the 128-bit alignment. */
9843 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9845 insn = emit_move_insn (mem, reg);
9846 RTX_FRAME_RELATED_P (insn) = 1;
9848 base = addr;
9849 if (GET_CODE (base) == PLUS)
9850 base = XEXP (base, 0);
9851 gcc_checking_assert (REG_P (base));
9853 /* When saving registers into a re-aligned local stack frame, avoid
9854 any tricky guessing by dwarf2out. */
9855 if (m->fs.realigned)
9857 gcc_checking_assert (stack_realign_drap);
9859 if (regno == REGNO (crtl->drap_reg))
9861 /* A bit of a hack. We force the DRAP register to be saved in
9862 the re-aligned stack frame, which provides us with a copy
9863 of the CFA that will last past the prologue. Install it. */
9864 gcc_checking_assert (cfun->machine->fs.fp_valid);
9865 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9866 cfun->machine->fs.fp_offset - cfa_offset);
9867 mem = gen_rtx_MEM (mode, addr);
9868 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9870 else
9872 /* The frame pointer is a stable reference within the
9873 aligned frame. Use it. */
9874 gcc_checking_assert (cfun->machine->fs.fp_valid);
9875 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9876 cfun->machine->fs.fp_offset - cfa_offset);
9877 mem = gen_rtx_MEM (mode, addr);
9878 add_reg_note (insn, REG_CFA_EXPRESSION,
9879 gen_rtx_SET (VOIDmode, mem, reg));
9883 /* The memory may not be relative to the current CFA register,
9884 which means that we may need to generate a new pattern for
9885 use by the unwind info. */
9886 else if (base != m->fs.cfa_reg)
9888 addr = plus_constant (Pmode, m->fs.cfa_reg,
9889 m->fs.cfa_offset - cfa_offset);
9890 mem = gen_rtx_MEM (mode, addr);
9891 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9895 /* Emit code to save registers using MOV insns.
9896 First register is stored at CFA - CFA_OFFSET. */
9897 static void
9898 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9900 unsigned int regno;
9902 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9903 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9905 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9906 cfa_offset -= UNITS_PER_WORD;
9910 /* Emit code to save SSE registers using MOV insns.
9911 First register is stored at CFA - CFA_OFFSET. */
9912 static void
9913 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9915 unsigned int regno;
9917 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9918 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9920 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9921 cfa_offset -= 16;
9925 static GTY(()) rtx queued_cfa_restores;
9927 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9928 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9929 Don't add the note if the previously saved value will be left untouched
9930 within stack red-zone till return, as unwinders can find the same value
9931 in the register and on the stack. */
9933 static void
9934 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9936 if (!crtl->shrink_wrapped
9937 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9938 return;
9940 if (insn)
9942 add_reg_note (insn, REG_CFA_RESTORE, reg);
9943 RTX_FRAME_RELATED_P (insn) = 1;
9945 else
9946 queued_cfa_restores
9947 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9950 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9952 static void
9953 ix86_add_queued_cfa_restore_notes (rtx insn)
9955 rtx last;
9956 if (!queued_cfa_restores)
9957 return;
9958 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9960 XEXP (last, 1) = REG_NOTES (insn);
9961 REG_NOTES (insn) = queued_cfa_restores;
9962 queued_cfa_restores = NULL_RTX;
9963 RTX_FRAME_RELATED_P (insn) = 1;
9966 /* Expand prologue or epilogue stack adjustment.
9967 The pattern exist to put a dependency on all ebp-based memory accesses.
9968 STYLE should be negative if instructions should be marked as frame related,
9969 zero if %r11 register is live and cannot be freely used and positive
9970 otherwise. */
9972 static void
9973 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9974 int style, bool set_cfa)
9976 struct machine_function *m = cfun->machine;
9977 rtx insn;
9978 bool add_frame_related_expr = false;
9980 if (Pmode == SImode)
9981 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9982 else if (x86_64_immediate_operand (offset, DImode))
9983 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9984 else
9986 rtx tmp;
9987 /* r11 is used by indirect sibcall return as well, set before the
9988 epilogue and used after the epilogue. */
9989 if (style)
9990 tmp = gen_rtx_REG (DImode, R11_REG);
9991 else
9993 gcc_assert (src != hard_frame_pointer_rtx
9994 && dest != hard_frame_pointer_rtx);
9995 tmp = hard_frame_pointer_rtx;
9997 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9998 if (style < 0)
9999 add_frame_related_expr = true;
10001 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10004 insn = emit_insn (insn);
10005 if (style >= 0)
10006 ix86_add_queued_cfa_restore_notes (insn);
10008 if (set_cfa)
10010 rtx r;
10012 gcc_assert (m->fs.cfa_reg == src);
10013 m->fs.cfa_offset += INTVAL (offset);
10014 m->fs.cfa_reg = dest;
10016 r = gen_rtx_PLUS (Pmode, src, offset);
10017 r = gen_rtx_SET (VOIDmode, dest, r);
10018 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10019 RTX_FRAME_RELATED_P (insn) = 1;
10021 else if (style < 0)
10023 RTX_FRAME_RELATED_P (insn) = 1;
10024 if (add_frame_related_expr)
10026 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10027 r = gen_rtx_SET (VOIDmode, dest, r);
10028 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10032 if (dest == stack_pointer_rtx)
10034 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10035 bool valid = m->fs.sp_valid;
10037 if (src == hard_frame_pointer_rtx)
10039 valid = m->fs.fp_valid;
10040 ooffset = m->fs.fp_offset;
10042 else if (src == crtl->drap_reg)
10044 valid = m->fs.drap_valid;
10045 ooffset = 0;
10047 else
10049 /* Else there are two possibilities: SP itself, which we set
10050 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10051 taken care of this by hand along the eh_return path. */
10052 gcc_checking_assert (src == stack_pointer_rtx
10053 || offset == const0_rtx);
10056 m->fs.sp_offset = ooffset - INTVAL (offset);
10057 m->fs.sp_valid = valid;
10061 /* Find an available register to be used as dynamic realign argument
10062 pointer regsiter. Such a register will be written in prologue and
10063 used in begin of body, so it must not be
10064 1. parameter passing register.
10065 2. GOT pointer.
10066 We reuse static-chain register if it is available. Otherwise, we
10067 use DI for i386 and R13 for x86-64. We chose R13 since it has
10068 shorter encoding.
10070 Return: the regno of chosen register. */
10072 static unsigned int
10073 find_drap_reg (void)
10075 tree decl = cfun->decl;
10077 if (TARGET_64BIT)
10079 /* Use R13 for nested function or function need static chain.
10080 Since function with tail call may use any caller-saved
10081 registers in epilogue, DRAP must not use caller-saved
10082 register in such case. */
10083 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10084 return R13_REG;
10086 return R10_REG;
10088 else
10090 /* Use DI for nested function or function need static chain.
10091 Since function with tail call may use any caller-saved
10092 registers in epilogue, DRAP must not use caller-saved
10093 register in such case. */
10094 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10095 return DI_REG;
10097 /* Reuse static chain register if it isn't used for parameter
10098 passing. */
10099 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10101 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10102 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10103 return CX_REG;
10105 return DI_REG;
10109 /* Return minimum incoming stack alignment. */
10111 static unsigned int
10112 ix86_minimum_incoming_stack_boundary (bool sibcall)
10114 unsigned int incoming_stack_boundary;
10116 /* Prefer the one specified at command line. */
10117 if (ix86_user_incoming_stack_boundary)
10118 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10119 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10120 if -mstackrealign is used, it isn't used for sibcall check and
10121 estimated stack alignment is 128bit. */
10122 else if (!sibcall
10123 && !TARGET_64BIT
10124 && ix86_force_align_arg_pointer
10125 && crtl->stack_alignment_estimated == 128)
10126 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10127 else
10128 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10130 /* Incoming stack alignment can be changed on individual functions
10131 via force_align_arg_pointer attribute. We use the smallest
10132 incoming stack boundary. */
10133 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10134 && lookup_attribute (ix86_force_align_arg_pointer_string,
10135 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10136 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10138 /* The incoming stack frame has to be aligned at least at
10139 parm_stack_boundary. */
10140 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10141 incoming_stack_boundary = crtl->parm_stack_boundary;
10143 /* Stack at entrance of main is aligned by runtime. We use the
10144 smallest incoming stack boundary. */
10145 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10146 && DECL_NAME (current_function_decl)
10147 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10148 && DECL_FILE_SCOPE_P (current_function_decl))
10149 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10151 return incoming_stack_boundary;
10154 /* Update incoming stack boundary and estimated stack alignment. */
10156 static void
10157 ix86_update_stack_boundary (void)
10159 ix86_incoming_stack_boundary
10160 = ix86_minimum_incoming_stack_boundary (false);
10162 /* x86_64 vararg needs 16byte stack alignment for register save
10163 area. */
10164 if (TARGET_64BIT
10165 && cfun->stdarg
10166 && crtl->stack_alignment_estimated < 128)
10167 crtl->stack_alignment_estimated = 128;
10170 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10171 needed or an rtx for DRAP otherwise. */
10173 static rtx
10174 ix86_get_drap_rtx (void)
10176 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10177 crtl->need_drap = true;
10179 if (stack_realign_drap)
10181 /* Assign DRAP to vDRAP and returns vDRAP */
10182 unsigned int regno = find_drap_reg ();
10183 rtx drap_vreg;
10184 rtx arg_ptr;
10185 rtx seq, insn;
10187 arg_ptr = gen_rtx_REG (Pmode, regno);
10188 crtl->drap_reg = arg_ptr;
10190 start_sequence ();
10191 drap_vreg = copy_to_reg (arg_ptr);
10192 seq = get_insns ();
10193 end_sequence ();
10195 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10196 if (!optimize)
10198 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10199 RTX_FRAME_RELATED_P (insn) = 1;
10201 return drap_vreg;
10203 else
10204 return NULL;
10207 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10209 static rtx
10210 ix86_internal_arg_pointer (void)
10212 return virtual_incoming_args_rtx;
10215 struct scratch_reg {
10216 rtx reg;
10217 bool saved;
10220 /* Return a short-lived scratch register for use on function entry.
10221 In 32-bit mode, it is valid only after the registers are saved
10222 in the prologue. This register must be released by means of
10223 release_scratch_register_on_entry once it is dead. */
10225 static void
10226 get_scratch_register_on_entry (struct scratch_reg *sr)
10228 int regno;
10230 sr->saved = false;
10232 if (TARGET_64BIT)
10234 /* We always use R11 in 64-bit mode. */
10235 regno = R11_REG;
10237 else
10239 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10240 bool fastcall_p
10241 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10242 bool thiscall_p
10243 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10244 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10245 int regparm = ix86_function_regparm (fntype, decl);
10246 int drap_regno
10247 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10249 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10250 for the static chain register. */
10251 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10252 && drap_regno != AX_REG)
10253 regno = AX_REG;
10254 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10255 for the static chain register. */
10256 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10257 regno = AX_REG;
10258 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10259 regno = DX_REG;
10260 /* ecx is the static chain register. */
10261 else if (regparm < 3 && !fastcall_p && !thiscall_p
10262 && !static_chain_p
10263 && drap_regno != CX_REG)
10264 regno = CX_REG;
10265 else if (ix86_save_reg (BX_REG, true))
10266 regno = BX_REG;
10267 /* esi is the static chain register. */
10268 else if (!(regparm == 3 && static_chain_p)
10269 && ix86_save_reg (SI_REG, true))
10270 regno = SI_REG;
10271 else if (ix86_save_reg (DI_REG, true))
10272 regno = DI_REG;
10273 else
10275 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10276 sr->saved = true;
10280 sr->reg = gen_rtx_REG (Pmode, regno);
10281 if (sr->saved)
10283 rtx insn = emit_insn (gen_push (sr->reg));
10284 RTX_FRAME_RELATED_P (insn) = 1;
10288 /* Release a scratch register obtained from the preceding function. */
10290 static void
10291 release_scratch_register_on_entry (struct scratch_reg *sr)
10293 if (sr->saved)
10295 struct machine_function *m = cfun->machine;
10296 rtx x, insn = emit_insn (gen_pop (sr->reg));
10298 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10299 RTX_FRAME_RELATED_P (insn) = 1;
10300 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10301 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10302 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10303 m->fs.sp_offset -= UNITS_PER_WORD;
10307 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10309 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10311 static void
10312 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10314 /* We skip the probe for the first interval + a small dope of 4 words and
10315 probe that many bytes past the specified size to maintain a protection
10316 area at the botton of the stack. */
10317 const int dope = 4 * UNITS_PER_WORD;
10318 rtx size_rtx = GEN_INT (size), last;
10320 /* See if we have a constant small number of probes to generate. If so,
10321 that's the easy case. The run-time loop is made up of 11 insns in the
10322 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10323 for n # of intervals. */
10324 if (size <= 5 * PROBE_INTERVAL)
10326 HOST_WIDE_INT i, adjust;
10327 bool first_probe = true;
10329 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10330 values of N from 1 until it exceeds SIZE. If only one probe is
10331 needed, this will not generate any code. Then adjust and probe
10332 to PROBE_INTERVAL + SIZE. */
10333 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10335 if (first_probe)
10337 adjust = 2 * PROBE_INTERVAL + dope;
10338 first_probe = false;
10340 else
10341 adjust = PROBE_INTERVAL;
10343 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10344 plus_constant (Pmode, stack_pointer_rtx,
10345 -adjust)));
10346 emit_stack_probe (stack_pointer_rtx);
10349 if (first_probe)
10350 adjust = size + PROBE_INTERVAL + dope;
10351 else
10352 adjust = size + PROBE_INTERVAL - i;
10354 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10355 plus_constant (Pmode, stack_pointer_rtx,
10356 -adjust)));
10357 emit_stack_probe (stack_pointer_rtx);
10359 /* Adjust back to account for the additional first interval. */
10360 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10361 plus_constant (Pmode, stack_pointer_rtx,
10362 PROBE_INTERVAL + dope)));
10365 /* Otherwise, do the same as above, but in a loop. Note that we must be
10366 extra careful with variables wrapping around because we might be at
10367 the very top (or the very bottom) of the address space and we have
10368 to be able to handle this case properly; in particular, we use an
10369 equality test for the loop condition. */
10370 else
10372 HOST_WIDE_INT rounded_size;
10373 struct scratch_reg sr;
10375 get_scratch_register_on_entry (&sr);
10378 /* Step 1: round SIZE to the previous multiple of the interval. */
10380 rounded_size = size & -PROBE_INTERVAL;
10383 /* Step 2: compute initial and final value of the loop counter. */
10385 /* SP = SP_0 + PROBE_INTERVAL. */
10386 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10387 plus_constant (Pmode, stack_pointer_rtx,
10388 - (PROBE_INTERVAL + dope))));
10390 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10391 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10392 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10393 gen_rtx_PLUS (Pmode, sr.reg,
10394 stack_pointer_rtx)));
10397 /* Step 3: the loop
10399 while (SP != LAST_ADDR)
10401 SP = SP + PROBE_INTERVAL
10402 probe at SP
10405 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10406 values of N from 1 until it is equal to ROUNDED_SIZE. */
10408 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10411 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10412 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10414 if (size != rounded_size)
10416 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10417 plus_constant (Pmode, stack_pointer_rtx,
10418 rounded_size - size)));
10419 emit_stack_probe (stack_pointer_rtx);
10422 /* Adjust back to account for the additional first interval. */
10423 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10424 plus_constant (Pmode, stack_pointer_rtx,
10425 PROBE_INTERVAL + dope)));
10427 release_scratch_register_on_entry (&sr);
10430 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10432 /* Even if the stack pointer isn't the CFA register, we need to correctly
10433 describe the adjustments made to it, in particular differentiate the
10434 frame-related ones from the frame-unrelated ones. */
10435 if (size > 0)
10437 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10438 XVECEXP (expr, 0, 0)
10439 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10440 plus_constant (Pmode, stack_pointer_rtx, -size));
10441 XVECEXP (expr, 0, 1)
10442 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10443 plus_constant (Pmode, stack_pointer_rtx,
10444 PROBE_INTERVAL + dope + size));
10445 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10446 RTX_FRAME_RELATED_P (last) = 1;
10448 cfun->machine->fs.sp_offset += size;
10451 /* Make sure nothing is scheduled before we are done. */
10452 emit_insn (gen_blockage ());
10455 /* Adjust the stack pointer up to REG while probing it. */
10457 const char *
10458 output_adjust_stack_and_probe (rtx reg)
10460 static int labelno = 0;
10461 char loop_lab[32], end_lab[32];
10462 rtx xops[2];
10464 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10465 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10467 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10469 /* Jump to END_LAB if SP == LAST_ADDR. */
10470 xops[0] = stack_pointer_rtx;
10471 xops[1] = reg;
10472 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10473 fputs ("\tje\t", asm_out_file);
10474 assemble_name_raw (asm_out_file, end_lab);
10475 fputc ('\n', asm_out_file);
10477 /* SP = SP + PROBE_INTERVAL. */
10478 xops[1] = GEN_INT (PROBE_INTERVAL);
10479 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10481 /* Probe at SP. */
10482 xops[1] = const0_rtx;
10483 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10485 fprintf (asm_out_file, "\tjmp\t");
10486 assemble_name_raw (asm_out_file, loop_lab);
10487 fputc ('\n', asm_out_file);
10489 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10491 return "";
10494 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10495 inclusive. These are offsets from the current stack pointer. */
10497 static void
10498 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10500 /* See if we have a constant small number of probes to generate. If so,
10501 that's the easy case. The run-time loop is made up of 7 insns in the
10502 generic case while the compile-time loop is made up of n insns for n #
10503 of intervals. */
10504 if (size <= 7 * PROBE_INTERVAL)
10506 HOST_WIDE_INT i;
10508 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10509 it exceeds SIZE. If only one probe is needed, this will not
10510 generate any code. Then probe at FIRST + SIZE. */
10511 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10512 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10513 -(first + i)));
10515 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10516 -(first + size)));
10519 /* Otherwise, do the same as above, but in a loop. Note that we must be
10520 extra careful with variables wrapping around because we might be at
10521 the very top (or the very bottom) of the address space and we have
10522 to be able to handle this case properly; in particular, we use an
10523 equality test for the loop condition. */
10524 else
10526 HOST_WIDE_INT rounded_size, last;
10527 struct scratch_reg sr;
10529 get_scratch_register_on_entry (&sr);
10532 /* Step 1: round SIZE to the previous multiple of the interval. */
10534 rounded_size = size & -PROBE_INTERVAL;
10537 /* Step 2: compute initial and final value of the loop counter. */
10539 /* TEST_OFFSET = FIRST. */
10540 emit_move_insn (sr.reg, GEN_INT (-first));
10542 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10543 last = first + rounded_size;
10546 /* Step 3: the loop
10548 while (TEST_ADDR != LAST_ADDR)
10550 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10551 probe at TEST_ADDR
10554 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10555 until it is equal to ROUNDED_SIZE. */
10557 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10560 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10561 that SIZE is equal to ROUNDED_SIZE. */
10563 if (size != rounded_size)
10564 emit_stack_probe (plus_constant (Pmode,
10565 gen_rtx_PLUS (Pmode,
10566 stack_pointer_rtx,
10567 sr.reg),
10568 rounded_size - size));
10570 release_scratch_register_on_entry (&sr);
10573 /* Make sure nothing is scheduled before we are done. */
10574 emit_insn (gen_blockage ());
10577 /* Probe a range of stack addresses from REG to END, inclusive. These are
10578 offsets from the current stack pointer. */
10580 const char *
10581 output_probe_stack_range (rtx reg, rtx end)
10583 static int labelno = 0;
10584 char loop_lab[32], end_lab[32];
10585 rtx xops[3];
10587 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10588 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10590 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10592 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10593 xops[0] = reg;
10594 xops[1] = end;
10595 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10596 fputs ("\tje\t", asm_out_file);
10597 assemble_name_raw (asm_out_file, end_lab);
10598 fputc ('\n', asm_out_file);
10600 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10601 xops[1] = GEN_INT (PROBE_INTERVAL);
10602 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10604 /* Probe at TEST_ADDR. */
10605 xops[0] = stack_pointer_rtx;
10606 xops[1] = reg;
10607 xops[2] = const0_rtx;
10608 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10610 fprintf (asm_out_file, "\tjmp\t");
10611 assemble_name_raw (asm_out_file, loop_lab);
10612 fputc ('\n', asm_out_file);
10614 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10616 return "";
10619 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10620 to be generated in correct form. */
10621 static void
10622 ix86_finalize_stack_realign_flags (void)
10624 /* Check if stack realign is really needed after reload, and
10625 stores result in cfun */
10626 unsigned int incoming_stack_boundary
10627 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10628 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10629 unsigned int stack_realign = (incoming_stack_boundary
10630 < (crtl->is_leaf
10631 ? crtl->max_used_stack_slot_alignment
10632 : crtl->stack_alignment_needed));
10634 if (crtl->stack_realign_finalized)
10636 /* After stack_realign_needed is finalized, we can't no longer
10637 change it. */
10638 gcc_assert (crtl->stack_realign_needed == stack_realign);
10639 return;
10642 /* If the only reason for frame_pointer_needed is that we conservatively
10643 assumed stack realignment might be needed, but in the end nothing that
10644 needed the stack alignment had been spilled, clear frame_pointer_needed
10645 and say we don't need stack realignment. */
10646 if (stack_realign
10647 && frame_pointer_needed
10648 && crtl->is_leaf
10649 && flag_omit_frame_pointer
10650 && crtl->sp_is_unchanging
10651 && !ix86_current_function_calls_tls_descriptor
10652 && !crtl->accesses_prior_frames
10653 && !cfun->calls_alloca
10654 && !crtl->calls_eh_return
10655 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10656 && !ix86_frame_pointer_required ()
10657 && get_frame_size () == 0
10658 && ix86_nsaved_sseregs () == 0
10659 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10661 HARD_REG_SET set_up_by_prologue, prologue_used;
10662 basic_block bb;
10664 CLEAR_HARD_REG_SET (prologue_used);
10665 CLEAR_HARD_REG_SET (set_up_by_prologue);
10666 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10667 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10668 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10669 HARD_FRAME_POINTER_REGNUM);
10670 FOR_EACH_BB_FN (bb, cfun)
10672 rtx insn;
10673 FOR_BB_INSNS (bb, insn)
10674 if (NONDEBUG_INSN_P (insn)
10675 && requires_stack_frame_p (insn, prologue_used,
10676 set_up_by_prologue))
10678 crtl->stack_realign_needed = stack_realign;
10679 crtl->stack_realign_finalized = true;
10680 return;
10684 /* If drap has been set, but it actually isn't live at the start
10685 of the function, there is no reason to set it up. */
10686 if (crtl->drap_reg)
10688 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10689 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10691 crtl->drap_reg = NULL_RTX;
10692 crtl->need_drap = false;
10695 else
10696 cfun->machine->no_drap_save_restore = true;
10698 frame_pointer_needed = false;
10699 stack_realign = false;
10700 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10701 crtl->stack_alignment_needed = incoming_stack_boundary;
10702 crtl->stack_alignment_estimated = incoming_stack_boundary;
10703 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10704 crtl->preferred_stack_boundary = incoming_stack_boundary;
10705 df_finish_pass (true);
10706 df_scan_alloc (NULL);
10707 df_scan_blocks ();
10708 df_compute_regs_ever_live (true);
10709 df_analyze ();
10712 crtl->stack_realign_needed = stack_realign;
10713 crtl->stack_realign_finalized = true;
10716 /* Expand the prologue into a bunch of separate insns. */
10718 void
10719 ix86_expand_prologue (void)
10721 struct machine_function *m = cfun->machine;
10722 rtx insn, t;
10723 bool pic_reg_used;
10724 struct ix86_frame frame;
10725 HOST_WIDE_INT allocate;
10726 bool int_registers_saved;
10727 bool sse_registers_saved;
10729 ix86_finalize_stack_realign_flags ();
10731 /* DRAP should not coexist with stack_realign_fp */
10732 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10734 memset (&m->fs, 0, sizeof (m->fs));
10736 /* Initialize CFA state for before the prologue. */
10737 m->fs.cfa_reg = stack_pointer_rtx;
10738 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10740 /* Track SP offset to the CFA. We continue tracking this after we've
10741 swapped the CFA register away from SP. In the case of re-alignment
10742 this is fudged; we're interested to offsets within the local frame. */
10743 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10744 m->fs.sp_valid = true;
10746 ix86_compute_frame_layout (&frame);
10748 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10750 /* We should have already generated an error for any use of
10751 ms_hook on a nested function. */
10752 gcc_checking_assert (!ix86_static_chain_on_stack);
10754 /* Check if profiling is active and we shall use profiling before
10755 prologue variant. If so sorry. */
10756 if (crtl->profile && flag_fentry != 0)
10757 sorry ("ms_hook_prologue attribute isn%'t compatible "
10758 "with -mfentry for 32-bit");
10760 /* In ix86_asm_output_function_label we emitted:
10761 8b ff movl.s %edi,%edi
10762 55 push %ebp
10763 8b ec movl.s %esp,%ebp
10765 This matches the hookable function prologue in Win32 API
10766 functions in Microsoft Windows XP Service Pack 2 and newer.
10767 Wine uses this to enable Windows apps to hook the Win32 API
10768 functions provided by Wine.
10770 What that means is that we've already set up the frame pointer. */
10772 if (frame_pointer_needed
10773 && !(crtl->drap_reg && crtl->stack_realign_needed))
10775 rtx push, mov;
10777 /* We've decided to use the frame pointer already set up.
10778 Describe this to the unwinder by pretending that both
10779 push and mov insns happen right here.
10781 Putting the unwind info here at the end of the ms_hook
10782 is done so that we can make absolutely certain we get
10783 the required byte sequence at the start of the function,
10784 rather than relying on an assembler that can produce
10785 the exact encoding required.
10787 However it does mean (in the unpatched case) that we have
10788 a 1 insn window where the asynchronous unwind info is
10789 incorrect. However, if we placed the unwind info at
10790 its correct location we would have incorrect unwind info
10791 in the patched case. Which is probably all moot since
10792 I don't expect Wine generates dwarf2 unwind info for the
10793 system libraries that use this feature. */
10795 insn = emit_insn (gen_blockage ());
10797 push = gen_push (hard_frame_pointer_rtx);
10798 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10799 stack_pointer_rtx);
10800 RTX_FRAME_RELATED_P (push) = 1;
10801 RTX_FRAME_RELATED_P (mov) = 1;
10803 RTX_FRAME_RELATED_P (insn) = 1;
10804 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10805 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10807 /* Note that gen_push incremented m->fs.cfa_offset, even
10808 though we didn't emit the push insn here. */
10809 m->fs.cfa_reg = hard_frame_pointer_rtx;
10810 m->fs.fp_offset = m->fs.cfa_offset;
10811 m->fs.fp_valid = true;
10813 else
10815 /* The frame pointer is not needed so pop %ebp again.
10816 This leaves us with a pristine state. */
10817 emit_insn (gen_pop (hard_frame_pointer_rtx));
10821 /* The first insn of a function that accepts its static chain on the
10822 stack is to push the register that would be filled in by a direct
10823 call. This insn will be skipped by the trampoline. */
10824 else if (ix86_static_chain_on_stack)
10826 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10827 emit_insn (gen_blockage ());
10829 /* We don't want to interpret this push insn as a register save,
10830 only as a stack adjustment. The real copy of the register as
10831 a save will be done later, if needed. */
10832 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10833 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10834 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10835 RTX_FRAME_RELATED_P (insn) = 1;
10838 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10839 of DRAP is needed and stack realignment is really needed after reload */
10840 if (stack_realign_drap)
10842 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10844 /* Only need to push parameter pointer reg if it is caller saved. */
10845 if (!call_used_regs[REGNO (crtl->drap_reg)])
10847 /* Push arg pointer reg */
10848 insn = emit_insn (gen_push (crtl->drap_reg));
10849 RTX_FRAME_RELATED_P (insn) = 1;
10852 /* Grab the argument pointer. */
10853 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10854 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10855 RTX_FRAME_RELATED_P (insn) = 1;
10856 m->fs.cfa_reg = crtl->drap_reg;
10857 m->fs.cfa_offset = 0;
10859 /* Align the stack. */
10860 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10861 stack_pointer_rtx,
10862 GEN_INT (-align_bytes)));
10863 RTX_FRAME_RELATED_P (insn) = 1;
10865 /* Replicate the return address on the stack so that return
10866 address can be reached via (argp - 1) slot. This is needed
10867 to implement macro RETURN_ADDR_RTX and intrinsic function
10868 expand_builtin_return_addr etc. */
10869 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10870 t = gen_frame_mem (word_mode, t);
10871 insn = emit_insn (gen_push (t));
10872 RTX_FRAME_RELATED_P (insn) = 1;
10874 /* For the purposes of frame and register save area addressing,
10875 we've started over with a new frame. */
10876 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10877 m->fs.realigned = true;
10880 int_registers_saved = (frame.nregs == 0);
10881 sse_registers_saved = (frame.nsseregs == 0);
10883 if (frame_pointer_needed && !m->fs.fp_valid)
10885 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10886 slower on all targets. Also sdb doesn't like it. */
10887 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10888 RTX_FRAME_RELATED_P (insn) = 1;
10890 /* Push registers now, before setting the frame pointer
10891 on SEH target. */
10892 if (!int_registers_saved
10893 && TARGET_SEH
10894 && !frame.save_regs_using_mov)
10896 ix86_emit_save_regs ();
10897 int_registers_saved = true;
10898 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10901 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10903 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10904 RTX_FRAME_RELATED_P (insn) = 1;
10906 if (m->fs.cfa_reg == stack_pointer_rtx)
10907 m->fs.cfa_reg = hard_frame_pointer_rtx;
10908 m->fs.fp_offset = m->fs.sp_offset;
10909 m->fs.fp_valid = true;
10913 if (!int_registers_saved)
10915 /* If saving registers via PUSH, do so now. */
10916 if (!frame.save_regs_using_mov)
10918 ix86_emit_save_regs ();
10919 int_registers_saved = true;
10920 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10923 /* When using red zone we may start register saving before allocating
10924 the stack frame saving one cycle of the prologue. However, avoid
10925 doing this if we have to probe the stack; at least on x86_64 the
10926 stack probe can turn into a call that clobbers a red zone location. */
10927 else if (ix86_using_red_zone ()
10928 && (! TARGET_STACK_PROBE
10929 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10931 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10932 int_registers_saved = true;
10936 if (stack_realign_fp)
10938 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10939 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10941 /* The computation of the size of the re-aligned stack frame means
10942 that we must allocate the size of the register save area before
10943 performing the actual alignment. Otherwise we cannot guarantee
10944 that there's enough storage above the realignment point. */
10945 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10946 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10947 GEN_INT (m->fs.sp_offset
10948 - frame.sse_reg_save_offset),
10949 -1, false);
10951 /* Align the stack. */
10952 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10953 stack_pointer_rtx,
10954 GEN_INT (-align_bytes)));
10956 /* For the purposes of register save area addressing, the stack
10957 pointer is no longer valid. As for the value of sp_offset,
10958 see ix86_compute_frame_layout, which we need to match in order
10959 to pass verification of stack_pointer_offset at the end. */
10960 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10961 m->fs.sp_valid = false;
10964 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10966 if (flag_stack_usage_info)
10968 /* We start to count from ARG_POINTER. */
10969 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10971 /* If it was realigned, take into account the fake frame. */
10972 if (stack_realign_drap)
10974 if (ix86_static_chain_on_stack)
10975 stack_size += UNITS_PER_WORD;
10977 if (!call_used_regs[REGNO (crtl->drap_reg)])
10978 stack_size += UNITS_PER_WORD;
10980 /* This over-estimates by 1 minimal-stack-alignment-unit but
10981 mitigates that by counting in the new return address slot. */
10982 current_function_dynamic_stack_size
10983 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10986 current_function_static_stack_size = stack_size;
10989 /* On SEH target with very large frame size, allocate an area to save
10990 SSE registers (as the very large allocation won't be described). */
10991 if (TARGET_SEH
10992 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10993 && !sse_registers_saved)
10995 HOST_WIDE_INT sse_size =
10996 frame.sse_reg_save_offset - frame.reg_save_offset;
10998 gcc_assert (int_registers_saved);
11000 /* No need to do stack checking as the area will be immediately
11001 written. */
11002 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11003 GEN_INT (-sse_size), -1,
11004 m->fs.cfa_reg == stack_pointer_rtx);
11005 allocate -= sse_size;
11006 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11007 sse_registers_saved = true;
11010 /* The stack has already been decremented by the instruction calling us
11011 so probe if the size is non-negative to preserve the protection area. */
11012 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11014 /* We expect the registers to be saved when probes are used. */
11015 gcc_assert (int_registers_saved);
11017 if (STACK_CHECK_MOVING_SP)
11019 if (!(crtl->is_leaf && !cfun->calls_alloca
11020 && allocate <= PROBE_INTERVAL))
11022 ix86_adjust_stack_and_probe (allocate);
11023 allocate = 0;
11026 else
11028 HOST_WIDE_INT size = allocate;
11030 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11031 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11033 if (TARGET_STACK_PROBE)
11035 if (crtl->is_leaf && !cfun->calls_alloca)
11037 if (size > PROBE_INTERVAL)
11038 ix86_emit_probe_stack_range (0, size);
11040 else
11041 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11043 else
11045 if (crtl->is_leaf && !cfun->calls_alloca)
11047 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11048 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11049 size - STACK_CHECK_PROTECT);
11051 else
11052 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11057 if (allocate == 0)
11059 else if (!ix86_target_stack_probe ()
11060 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11062 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11063 GEN_INT (-allocate), -1,
11064 m->fs.cfa_reg == stack_pointer_rtx);
11066 else
11068 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11069 rtx r10 = NULL;
11070 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11071 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11072 bool eax_live = ix86_eax_live_at_start_p ();
11073 bool r10_live = false;
11075 if (TARGET_64BIT)
11076 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11078 if (eax_live)
11080 insn = emit_insn (gen_push (eax));
11081 allocate -= UNITS_PER_WORD;
11082 /* Note that SEH directives need to continue tracking the stack
11083 pointer even after the frame pointer has been set up. */
11084 if (sp_is_cfa_reg || TARGET_SEH)
11086 if (sp_is_cfa_reg)
11087 m->fs.cfa_offset += UNITS_PER_WORD;
11088 RTX_FRAME_RELATED_P (insn) = 1;
11092 if (r10_live)
11094 r10 = gen_rtx_REG (Pmode, R10_REG);
11095 insn = emit_insn (gen_push (r10));
11096 allocate -= UNITS_PER_WORD;
11097 if (sp_is_cfa_reg || TARGET_SEH)
11099 if (sp_is_cfa_reg)
11100 m->fs.cfa_offset += UNITS_PER_WORD;
11101 RTX_FRAME_RELATED_P (insn) = 1;
11105 emit_move_insn (eax, GEN_INT (allocate));
11106 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11108 /* Use the fact that AX still contains ALLOCATE. */
11109 adjust_stack_insn = (Pmode == DImode
11110 ? gen_pro_epilogue_adjust_stack_di_sub
11111 : gen_pro_epilogue_adjust_stack_si_sub);
11113 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11114 stack_pointer_rtx, eax));
11116 if (sp_is_cfa_reg || TARGET_SEH)
11118 if (sp_is_cfa_reg)
11119 m->fs.cfa_offset += allocate;
11120 RTX_FRAME_RELATED_P (insn) = 1;
11121 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11122 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11123 plus_constant (Pmode, stack_pointer_rtx,
11124 -allocate)));
11126 m->fs.sp_offset += allocate;
11128 /* Use stack_pointer_rtx for relative addressing so that code
11129 works for realigned stack, too. */
11130 if (r10_live && eax_live)
11132 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11133 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11134 gen_frame_mem (word_mode, t));
11135 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11136 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11137 gen_frame_mem (word_mode, t));
11139 else if (eax_live || r10_live)
11141 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11142 emit_move_insn (gen_rtx_REG (word_mode,
11143 (eax_live ? AX_REG : R10_REG)),
11144 gen_frame_mem (word_mode, t));
11147 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11149 /* If we havn't already set up the frame pointer, do so now. */
11150 if (frame_pointer_needed && !m->fs.fp_valid)
11152 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11153 GEN_INT (frame.stack_pointer_offset
11154 - frame.hard_frame_pointer_offset));
11155 insn = emit_insn (insn);
11156 RTX_FRAME_RELATED_P (insn) = 1;
11157 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11159 if (m->fs.cfa_reg == stack_pointer_rtx)
11160 m->fs.cfa_reg = hard_frame_pointer_rtx;
11161 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11162 m->fs.fp_valid = true;
11165 if (!int_registers_saved)
11166 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11167 if (!sse_registers_saved)
11168 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11170 pic_reg_used = false;
11171 /* We don't use pic-register for pe-coff target. */
11172 if (pic_offset_table_rtx
11173 && !TARGET_PECOFF
11174 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11175 || crtl->profile))
11177 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11179 if (alt_pic_reg_used != INVALID_REGNUM)
11180 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11182 pic_reg_used = true;
11185 if (pic_reg_used)
11187 if (TARGET_64BIT)
11189 if (ix86_cmodel == CM_LARGE_PIC)
11191 rtx label, tmp_reg;
11193 gcc_assert (Pmode == DImode);
11194 label = gen_label_rtx ();
11195 emit_label (label);
11196 LABEL_PRESERVE_P (label) = 1;
11197 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11198 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11199 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11200 label));
11201 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11202 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11203 pic_offset_table_rtx, tmp_reg));
11205 else
11206 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11208 else
11210 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11211 RTX_FRAME_RELATED_P (insn) = 1;
11212 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11216 /* In the pic_reg_used case, make sure that the got load isn't deleted
11217 when mcount needs it. Blockage to avoid call movement across mcount
11218 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11219 note. */
11220 if (crtl->profile && !flag_fentry && pic_reg_used)
11221 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11223 if (crtl->drap_reg && !crtl->stack_realign_needed)
11225 /* vDRAP is setup but after reload it turns out stack realign
11226 isn't necessary, here we will emit prologue to setup DRAP
11227 without stack realign adjustment */
11228 t = choose_baseaddr (0);
11229 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11232 /* Prevent instructions from being scheduled into register save push
11233 sequence when access to the redzone area is done through frame pointer.
11234 The offset between the frame pointer and the stack pointer is calculated
11235 relative to the value of the stack pointer at the end of the function
11236 prologue, and moving instructions that access redzone area via frame
11237 pointer inside push sequence violates this assumption. */
11238 if (frame_pointer_needed && frame.red_zone_size)
11239 emit_insn (gen_memory_blockage ());
11241 /* Emit cld instruction if stringops are used in the function. */
11242 if (TARGET_CLD && ix86_current_function_needs_cld)
11243 emit_insn (gen_cld ());
11245 /* SEH requires that the prologue end within 256 bytes of the start of
11246 the function. Prevent instruction schedules that would extend that.
11247 Further, prevent alloca modifications to the stack pointer from being
11248 combined with prologue modifications. */
11249 if (TARGET_SEH)
11250 emit_insn (gen_prologue_use (stack_pointer_rtx));
11253 /* Emit code to restore REG using a POP insn. */
11255 static void
11256 ix86_emit_restore_reg_using_pop (rtx reg)
11258 struct machine_function *m = cfun->machine;
11259 rtx insn = emit_insn (gen_pop (reg));
11261 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11262 m->fs.sp_offset -= UNITS_PER_WORD;
11264 if (m->fs.cfa_reg == crtl->drap_reg
11265 && REGNO (reg) == REGNO (crtl->drap_reg))
11267 /* Previously we'd represented the CFA as an expression
11268 like *(%ebp - 8). We've just popped that value from
11269 the stack, which means we need to reset the CFA to
11270 the drap register. This will remain until we restore
11271 the stack pointer. */
11272 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11273 RTX_FRAME_RELATED_P (insn) = 1;
11275 /* This means that the DRAP register is valid for addressing too. */
11276 m->fs.drap_valid = true;
11277 return;
11280 if (m->fs.cfa_reg == stack_pointer_rtx)
11282 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11283 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11284 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11285 RTX_FRAME_RELATED_P (insn) = 1;
11287 m->fs.cfa_offset -= UNITS_PER_WORD;
11290 /* When the frame pointer is the CFA, and we pop it, we are
11291 swapping back to the stack pointer as the CFA. This happens
11292 for stack frames that don't allocate other data, so we assume
11293 the stack pointer is now pointing at the return address, i.e.
11294 the function entry state, which makes the offset be 1 word. */
11295 if (reg == hard_frame_pointer_rtx)
11297 m->fs.fp_valid = false;
11298 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11300 m->fs.cfa_reg = stack_pointer_rtx;
11301 m->fs.cfa_offset -= UNITS_PER_WORD;
11303 add_reg_note (insn, REG_CFA_DEF_CFA,
11304 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11305 GEN_INT (m->fs.cfa_offset)));
11306 RTX_FRAME_RELATED_P (insn) = 1;
11311 /* Emit code to restore saved registers using POP insns. */
11313 static void
11314 ix86_emit_restore_regs_using_pop (void)
11316 unsigned int regno;
11318 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11319 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11320 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11323 /* Emit code and notes for the LEAVE instruction. */
11325 static void
11326 ix86_emit_leave (void)
11328 struct machine_function *m = cfun->machine;
11329 rtx insn = emit_insn (ix86_gen_leave ());
11331 ix86_add_queued_cfa_restore_notes (insn);
11333 gcc_assert (m->fs.fp_valid);
11334 m->fs.sp_valid = true;
11335 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11336 m->fs.fp_valid = false;
11338 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11340 m->fs.cfa_reg = stack_pointer_rtx;
11341 m->fs.cfa_offset = m->fs.sp_offset;
11343 add_reg_note (insn, REG_CFA_DEF_CFA,
11344 plus_constant (Pmode, stack_pointer_rtx,
11345 m->fs.sp_offset));
11346 RTX_FRAME_RELATED_P (insn) = 1;
11348 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11349 m->fs.fp_offset);
11352 /* Emit code to restore saved registers using MOV insns.
11353 First register is restored from CFA - CFA_OFFSET. */
11354 static void
11355 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11356 bool maybe_eh_return)
11358 struct machine_function *m = cfun->machine;
11359 unsigned int regno;
11361 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11362 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11364 rtx reg = gen_rtx_REG (word_mode, regno);
11365 rtx insn, mem;
11367 mem = choose_baseaddr (cfa_offset);
11368 mem = gen_frame_mem (word_mode, mem);
11369 insn = emit_move_insn (reg, mem);
11371 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11373 /* Previously we'd represented the CFA as an expression
11374 like *(%ebp - 8). We've just popped that value from
11375 the stack, which means we need to reset the CFA to
11376 the drap register. This will remain until we restore
11377 the stack pointer. */
11378 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11379 RTX_FRAME_RELATED_P (insn) = 1;
11381 /* This means that the DRAP register is valid for addressing. */
11382 m->fs.drap_valid = true;
11384 else
11385 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11387 cfa_offset -= UNITS_PER_WORD;
11391 /* Emit code to restore saved registers using MOV insns.
11392 First register is restored from CFA - CFA_OFFSET. */
11393 static void
11394 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11395 bool maybe_eh_return)
11397 unsigned int regno;
11399 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11400 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11402 rtx reg = gen_rtx_REG (V4SFmode, regno);
11403 rtx mem;
11405 mem = choose_baseaddr (cfa_offset);
11406 mem = gen_rtx_MEM (V4SFmode, mem);
11407 set_mem_align (mem, 128);
11408 emit_move_insn (reg, mem);
11410 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11412 cfa_offset -= 16;
11416 /* Restore function stack, frame, and registers. */
11418 void
11419 ix86_expand_epilogue (int style)
11421 struct machine_function *m = cfun->machine;
11422 struct machine_frame_state frame_state_save = m->fs;
11423 struct ix86_frame frame;
11424 bool restore_regs_via_mov;
11425 bool using_drap;
11427 ix86_finalize_stack_realign_flags ();
11428 ix86_compute_frame_layout (&frame);
11430 m->fs.sp_valid = (!frame_pointer_needed
11431 || (crtl->sp_is_unchanging
11432 && !stack_realign_fp));
11433 gcc_assert (!m->fs.sp_valid
11434 || m->fs.sp_offset == frame.stack_pointer_offset);
11436 /* The FP must be valid if the frame pointer is present. */
11437 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11438 gcc_assert (!m->fs.fp_valid
11439 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11441 /* We must have *some* valid pointer to the stack frame. */
11442 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11444 /* The DRAP is never valid at this point. */
11445 gcc_assert (!m->fs.drap_valid);
11447 /* See the comment about red zone and frame
11448 pointer usage in ix86_expand_prologue. */
11449 if (frame_pointer_needed && frame.red_zone_size)
11450 emit_insn (gen_memory_blockage ());
11452 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11453 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11455 /* Determine the CFA offset of the end of the red-zone. */
11456 m->fs.red_zone_offset = 0;
11457 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11459 /* The red-zone begins below the return address. */
11460 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11462 /* When the register save area is in the aligned portion of
11463 the stack, determine the maximum runtime displacement that
11464 matches up with the aligned frame. */
11465 if (stack_realign_drap)
11466 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11467 + UNITS_PER_WORD);
11470 /* Special care must be taken for the normal return case of a function
11471 using eh_return: the eax and edx registers are marked as saved, but
11472 not restored along this path. Adjust the save location to match. */
11473 if (crtl->calls_eh_return && style != 2)
11474 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11476 /* EH_RETURN requires the use of moves to function properly. */
11477 if (crtl->calls_eh_return)
11478 restore_regs_via_mov = true;
11479 /* SEH requires the use of pops to identify the epilogue. */
11480 else if (TARGET_SEH)
11481 restore_regs_via_mov = false;
11482 /* If we're only restoring one register and sp is not valid then
11483 using a move instruction to restore the register since it's
11484 less work than reloading sp and popping the register. */
11485 else if (!m->fs.sp_valid && frame.nregs <= 1)
11486 restore_regs_via_mov = true;
11487 else if (TARGET_EPILOGUE_USING_MOVE
11488 && cfun->machine->use_fast_prologue_epilogue
11489 && (frame.nregs > 1
11490 || m->fs.sp_offset != frame.reg_save_offset))
11491 restore_regs_via_mov = true;
11492 else if (frame_pointer_needed
11493 && !frame.nregs
11494 && m->fs.sp_offset != frame.reg_save_offset)
11495 restore_regs_via_mov = true;
11496 else if (frame_pointer_needed
11497 && TARGET_USE_LEAVE
11498 && cfun->machine->use_fast_prologue_epilogue
11499 && frame.nregs == 1)
11500 restore_regs_via_mov = true;
11501 else
11502 restore_regs_via_mov = false;
11504 if (restore_regs_via_mov || frame.nsseregs)
11506 /* Ensure that the entire register save area is addressable via
11507 the stack pointer, if we will restore via sp. */
11508 if (TARGET_64BIT
11509 && m->fs.sp_offset > 0x7fffffff
11510 && !(m->fs.fp_valid || m->fs.drap_valid)
11511 && (frame.nsseregs + frame.nregs) != 0)
11513 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11514 GEN_INT (m->fs.sp_offset
11515 - frame.sse_reg_save_offset),
11516 style,
11517 m->fs.cfa_reg == stack_pointer_rtx);
11521 /* If there are any SSE registers to restore, then we have to do it
11522 via moves, since there's obviously no pop for SSE regs. */
11523 if (frame.nsseregs)
11524 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11525 style == 2);
11527 if (restore_regs_via_mov)
11529 rtx t;
11531 if (frame.nregs)
11532 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11534 /* eh_return epilogues need %ecx added to the stack pointer. */
11535 if (style == 2)
11537 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11539 /* Stack align doesn't work with eh_return. */
11540 gcc_assert (!stack_realign_drap);
11541 /* Neither does regparm nested functions. */
11542 gcc_assert (!ix86_static_chain_on_stack);
11544 if (frame_pointer_needed)
11546 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11547 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11548 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11550 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11551 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11553 /* Note that we use SA as a temporary CFA, as the return
11554 address is at the proper place relative to it. We
11555 pretend this happens at the FP restore insn because
11556 prior to this insn the FP would be stored at the wrong
11557 offset relative to SA, and after this insn we have no
11558 other reasonable register to use for the CFA. We don't
11559 bother resetting the CFA to the SP for the duration of
11560 the return insn. */
11561 add_reg_note (insn, REG_CFA_DEF_CFA,
11562 plus_constant (Pmode, sa, UNITS_PER_WORD));
11563 ix86_add_queued_cfa_restore_notes (insn);
11564 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11565 RTX_FRAME_RELATED_P (insn) = 1;
11567 m->fs.cfa_reg = sa;
11568 m->fs.cfa_offset = UNITS_PER_WORD;
11569 m->fs.fp_valid = false;
11571 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11572 const0_rtx, style, false);
11574 else
11576 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11577 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11578 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11579 ix86_add_queued_cfa_restore_notes (insn);
11581 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11582 if (m->fs.cfa_offset != UNITS_PER_WORD)
11584 m->fs.cfa_offset = UNITS_PER_WORD;
11585 add_reg_note (insn, REG_CFA_DEF_CFA,
11586 plus_constant (Pmode, stack_pointer_rtx,
11587 UNITS_PER_WORD));
11588 RTX_FRAME_RELATED_P (insn) = 1;
11591 m->fs.sp_offset = UNITS_PER_WORD;
11592 m->fs.sp_valid = true;
11595 else
11597 /* SEH requires that the function end with (1) a stack adjustment
11598 if necessary, (2) a sequence of pops, and (3) a return or
11599 jump instruction. Prevent insns from the function body from
11600 being scheduled into this sequence. */
11601 if (TARGET_SEH)
11603 /* Prevent a catch region from being adjacent to the standard
11604 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11605 several other flags that would be interesting to test are
11606 not yet set up. */
11607 if (flag_non_call_exceptions)
11608 emit_insn (gen_nops (const1_rtx));
11609 else
11610 emit_insn (gen_blockage ());
11613 /* First step is to deallocate the stack frame so that we can
11614 pop the registers. Also do it on SEH target for very large
11615 frame as the emitted instructions aren't allowed by the ABI in
11616 epilogues. */
11617 if (!m->fs.sp_valid
11618 || (TARGET_SEH
11619 && (m->fs.sp_offset - frame.reg_save_offset
11620 >= SEH_MAX_FRAME_SIZE)))
11622 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11623 GEN_INT (m->fs.fp_offset
11624 - frame.reg_save_offset),
11625 style, false);
11627 else if (m->fs.sp_offset != frame.reg_save_offset)
11629 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11630 GEN_INT (m->fs.sp_offset
11631 - frame.reg_save_offset),
11632 style,
11633 m->fs.cfa_reg == stack_pointer_rtx);
11636 ix86_emit_restore_regs_using_pop ();
11639 /* If we used a stack pointer and haven't already got rid of it,
11640 then do so now. */
11641 if (m->fs.fp_valid)
11643 /* If the stack pointer is valid and pointing at the frame
11644 pointer store address, then we only need a pop. */
11645 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11646 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11647 /* Leave results in shorter dependency chains on CPUs that are
11648 able to grok it fast. */
11649 else if (TARGET_USE_LEAVE
11650 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11651 || !cfun->machine->use_fast_prologue_epilogue)
11652 ix86_emit_leave ();
11653 else
11655 pro_epilogue_adjust_stack (stack_pointer_rtx,
11656 hard_frame_pointer_rtx,
11657 const0_rtx, style, !using_drap);
11658 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11662 if (using_drap)
11664 int param_ptr_offset = UNITS_PER_WORD;
11665 rtx insn;
11667 gcc_assert (stack_realign_drap);
11669 if (ix86_static_chain_on_stack)
11670 param_ptr_offset += UNITS_PER_WORD;
11671 if (!call_used_regs[REGNO (crtl->drap_reg)])
11672 param_ptr_offset += UNITS_PER_WORD;
11674 insn = emit_insn (gen_rtx_SET
11675 (VOIDmode, stack_pointer_rtx,
11676 gen_rtx_PLUS (Pmode,
11677 crtl->drap_reg,
11678 GEN_INT (-param_ptr_offset))));
11679 m->fs.cfa_reg = stack_pointer_rtx;
11680 m->fs.cfa_offset = param_ptr_offset;
11681 m->fs.sp_offset = param_ptr_offset;
11682 m->fs.realigned = false;
11684 add_reg_note (insn, REG_CFA_DEF_CFA,
11685 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11686 GEN_INT (param_ptr_offset)));
11687 RTX_FRAME_RELATED_P (insn) = 1;
11689 if (!call_used_regs[REGNO (crtl->drap_reg)])
11690 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11693 /* At this point the stack pointer must be valid, and we must have
11694 restored all of the registers. We may not have deallocated the
11695 entire stack frame. We've delayed this until now because it may
11696 be possible to merge the local stack deallocation with the
11697 deallocation forced by ix86_static_chain_on_stack. */
11698 gcc_assert (m->fs.sp_valid);
11699 gcc_assert (!m->fs.fp_valid);
11700 gcc_assert (!m->fs.realigned);
11701 if (m->fs.sp_offset != UNITS_PER_WORD)
11703 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11704 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11705 style, true);
11707 else
11708 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11710 /* Sibcall epilogues don't want a return instruction. */
11711 if (style == 0)
11713 m->fs = frame_state_save;
11714 return;
11717 if (crtl->args.pops_args && crtl->args.size)
11719 rtx popc = GEN_INT (crtl->args.pops_args);
11721 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11722 address, do explicit add, and jump indirectly to the caller. */
11724 if (crtl->args.pops_args >= 65536)
11726 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11727 rtx insn;
11729 /* There is no "pascal" calling convention in any 64bit ABI. */
11730 gcc_assert (!TARGET_64BIT);
11732 insn = emit_insn (gen_pop (ecx));
11733 m->fs.cfa_offset -= UNITS_PER_WORD;
11734 m->fs.sp_offset -= UNITS_PER_WORD;
11736 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11737 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11738 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11739 add_reg_note (insn, REG_CFA_REGISTER,
11740 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11741 RTX_FRAME_RELATED_P (insn) = 1;
11743 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11744 popc, -1, true);
11745 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11747 else
11748 emit_jump_insn (gen_simple_return_pop_internal (popc));
11750 else
11751 emit_jump_insn (gen_simple_return_internal ());
11753 /* Restore the state back to the state from the prologue,
11754 so that it's correct for the next epilogue. */
11755 m->fs = frame_state_save;
11758 /* Reset from the function's potential modifications. */
11760 static void
11761 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11762 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11764 if (pic_offset_table_rtx)
11765 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11766 #if TARGET_MACHO
11767 /* Mach-O doesn't support labels at the end of objects, so if
11768 it looks like we might want one, insert a NOP. */
11770 rtx insn = get_last_insn ();
11771 rtx deleted_debug_label = NULL_RTX;
11772 while (insn
11773 && NOTE_P (insn)
11774 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11776 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11777 notes only, instead set their CODE_LABEL_NUMBER to -1,
11778 otherwise there would be code generation differences
11779 in between -g and -g0. */
11780 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11781 deleted_debug_label = insn;
11782 insn = PREV_INSN (insn);
11784 if (insn
11785 && (LABEL_P (insn)
11786 || (NOTE_P (insn)
11787 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11788 fputs ("\tnop\n", file);
11789 else if (deleted_debug_label)
11790 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11791 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11792 CODE_LABEL_NUMBER (insn) = -1;
11794 #endif
11798 /* Return a scratch register to use in the split stack prologue. The
11799 split stack prologue is used for -fsplit-stack. It is the first
11800 instructions in the function, even before the regular prologue.
11801 The scratch register can be any caller-saved register which is not
11802 used for parameters or for the static chain. */
11804 static unsigned int
11805 split_stack_prologue_scratch_regno (void)
11807 if (TARGET_64BIT)
11808 return R11_REG;
11809 else
11811 bool is_fastcall, is_thiscall;
11812 int regparm;
11814 is_fastcall = (lookup_attribute ("fastcall",
11815 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11816 != NULL);
11817 is_thiscall = (lookup_attribute ("thiscall",
11818 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11819 != NULL);
11820 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11822 if (is_fastcall)
11824 if (DECL_STATIC_CHAIN (cfun->decl))
11826 sorry ("-fsplit-stack does not support fastcall with "
11827 "nested function");
11828 return INVALID_REGNUM;
11830 return AX_REG;
11832 else if (is_thiscall)
11834 if (!DECL_STATIC_CHAIN (cfun->decl))
11835 return DX_REG;
11836 return AX_REG;
11838 else if (regparm < 3)
11840 if (!DECL_STATIC_CHAIN (cfun->decl))
11841 return CX_REG;
11842 else
11844 if (regparm >= 2)
11846 sorry ("-fsplit-stack does not support 2 register "
11847 "parameters for a nested function");
11848 return INVALID_REGNUM;
11850 return DX_REG;
11853 else
11855 /* FIXME: We could make this work by pushing a register
11856 around the addition and comparison. */
11857 sorry ("-fsplit-stack does not support 3 register parameters");
11858 return INVALID_REGNUM;
11863 /* A SYMBOL_REF for the function which allocates new stackspace for
11864 -fsplit-stack. */
11866 static GTY(()) rtx split_stack_fn;
11868 /* A SYMBOL_REF for the more stack function when using the large
11869 model. */
11871 static GTY(()) rtx split_stack_fn_large;
11873 /* Handle -fsplit-stack. These are the first instructions in the
11874 function, even before the regular prologue. */
11876 void
11877 ix86_expand_split_stack_prologue (void)
11879 struct ix86_frame frame;
11880 HOST_WIDE_INT allocate;
11881 unsigned HOST_WIDE_INT args_size;
11882 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11883 rtx scratch_reg = NULL_RTX;
11884 rtx varargs_label = NULL_RTX;
11885 rtx fn;
11887 gcc_assert (flag_split_stack && reload_completed);
11889 ix86_finalize_stack_realign_flags ();
11890 ix86_compute_frame_layout (&frame);
11891 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11893 /* This is the label we will branch to if we have enough stack
11894 space. We expect the basic block reordering pass to reverse this
11895 branch if optimizing, so that we branch in the unlikely case. */
11896 label = gen_label_rtx ();
11898 /* We need to compare the stack pointer minus the frame size with
11899 the stack boundary in the TCB. The stack boundary always gives
11900 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11901 can compare directly. Otherwise we need to do an addition. */
11903 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11904 UNSPEC_STACK_CHECK);
11905 limit = gen_rtx_CONST (Pmode, limit);
11906 limit = gen_rtx_MEM (Pmode, limit);
11907 if (allocate < SPLIT_STACK_AVAILABLE)
11908 current = stack_pointer_rtx;
11909 else
11911 unsigned int scratch_regno;
11912 rtx offset;
11914 /* We need a scratch register to hold the stack pointer minus
11915 the required frame size. Since this is the very start of the
11916 function, the scratch register can be any caller-saved
11917 register which is not used for parameters. */
11918 offset = GEN_INT (- allocate);
11919 scratch_regno = split_stack_prologue_scratch_regno ();
11920 if (scratch_regno == INVALID_REGNUM)
11921 return;
11922 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11923 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11925 /* We don't use ix86_gen_add3 in this case because it will
11926 want to split to lea, but when not optimizing the insn
11927 will not be split after this point. */
11928 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11929 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11930 offset)));
11932 else
11934 emit_move_insn (scratch_reg, offset);
11935 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11936 stack_pointer_rtx));
11938 current = scratch_reg;
11941 ix86_expand_branch (GEU, current, limit, label);
11942 jump_insn = get_last_insn ();
11943 JUMP_LABEL (jump_insn) = label;
11945 /* Mark the jump as very likely to be taken. */
11946 add_int_reg_note (jump_insn, REG_BR_PROB,
11947 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11949 if (split_stack_fn == NULL_RTX)
11950 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11951 fn = split_stack_fn;
11953 /* Get more stack space. We pass in the desired stack space and the
11954 size of the arguments to copy to the new stack. In 32-bit mode
11955 we push the parameters; __morestack will return on a new stack
11956 anyhow. In 64-bit mode we pass the parameters in r10 and
11957 r11. */
11958 allocate_rtx = GEN_INT (allocate);
11959 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11960 call_fusage = NULL_RTX;
11961 if (TARGET_64BIT)
11963 rtx reg10, reg11;
11965 reg10 = gen_rtx_REG (Pmode, R10_REG);
11966 reg11 = gen_rtx_REG (Pmode, R11_REG);
11968 /* If this function uses a static chain, it will be in %r10.
11969 Preserve it across the call to __morestack. */
11970 if (DECL_STATIC_CHAIN (cfun->decl))
11972 rtx rax;
11974 rax = gen_rtx_REG (word_mode, AX_REG);
11975 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11976 use_reg (&call_fusage, rax);
11979 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11980 && !TARGET_PECOFF)
11982 HOST_WIDE_INT argval;
11984 gcc_assert (Pmode == DImode);
11985 /* When using the large model we need to load the address
11986 into a register, and we've run out of registers. So we
11987 switch to a different calling convention, and we call a
11988 different function: __morestack_large. We pass the
11989 argument size in the upper 32 bits of r10 and pass the
11990 frame size in the lower 32 bits. */
11991 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11992 gcc_assert ((args_size & 0xffffffff) == args_size);
11994 if (split_stack_fn_large == NULL_RTX)
11995 split_stack_fn_large =
11996 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11998 if (ix86_cmodel == CM_LARGE_PIC)
12000 rtx label, x;
12002 label = gen_label_rtx ();
12003 emit_label (label);
12004 LABEL_PRESERVE_P (label) = 1;
12005 emit_insn (gen_set_rip_rex64 (reg10, label));
12006 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12007 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12008 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12009 UNSPEC_GOT);
12010 x = gen_rtx_CONST (Pmode, x);
12011 emit_move_insn (reg11, x);
12012 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12013 x = gen_const_mem (Pmode, x);
12014 emit_move_insn (reg11, x);
12016 else
12017 emit_move_insn (reg11, split_stack_fn_large);
12019 fn = reg11;
12021 argval = ((args_size << 16) << 16) + allocate;
12022 emit_move_insn (reg10, GEN_INT (argval));
12024 else
12026 emit_move_insn (reg10, allocate_rtx);
12027 emit_move_insn (reg11, GEN_INT (args_size));
12028 use_reg (&call_fusage, reg11);
12031 use_reg (&call_fusage, reg10);
12033 else
12035 emit_insn (gen_push (GEN_INT (args_size)));
12036 emit_insn (gen_push (allocate_rtx));
12038 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12039 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12040 NULL_RTX, false);
12041 add_function_usage_to (call_insn, call_fusage);
12043 /* In order to make call/return prediction work right, we now need
12044 to execute a return instruction. See
12045 libgcc/config/i386/morestack.S for the details on how this works.
12047 For flow purposes gcc must not see this as a return
12048 instruction--we need control flow to continue at the subsequent
12049 label. Therefore, we use an unspec. */
12050 gcc_assert (crtl->args.pops_args < 65536);
12051 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12053 /* If we are in 64-bit mode and this function uses a static chain,
12054 we saved %r10 in %rax before calling _morestack. */
12055 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12056 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12057 gen_rtx_REG (word_mode, AX_REG));
12059 /* If this function calls va_start, we need to store a pointer to
12060 the arguments on the old stack, because they may not have been
12061 all copied to the new stack. At this point the old stack can be
12062 found at the frame pointer value used by __morestack, because
12063 __morestack has set that up before calling back to us. Here we
12064 store that pointer in a scratch register, and in
12065 ix86_expand_prologue we store the scratch register in a stack
12066 slot. */
12067 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12069 unsigned int scratch_regno;
12070 rtx frame_reg;
12071 int words;
12073 scratch_regno = split_stack_prologue_scratch_regno ();
12074 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12075 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12077 /* 64-bit:
12078 fp -> old fp value
12079 return address within this function
12080 return address of caller of this function
12081 stack arguments
12082 So we add three words to get to the stack arguments.
12084 32-bit:
12085 fp -> old fp value
12086 return address within this function
12087 first argument to __morestack
12088 second argument to __morestack
12089 return address of caller of this function
12090 stack arguments
12091 So we add five words to get to the stack arguments.
12093 words = TARGET_64BIT ? 3 : 5;
12094 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12095 gen_rtx_PLUS (Pmode, frame_reg,
12096 GEN_INT (words * UNITS_PER_WORD))));
12098 varargs_label = gen_label_rtx ();
12099 emit_jump_insn (gen_jump (varargs_label));
12100 JUMP_LABEL (get_last_insn ()) = varargs_label;
12102 emit_barrier ();
12105 emit_label (label);
12106 LABEL_NUSES (label) = 1;
12108 /* If this function calls va_start, we now have to set the scratch
12109 register for the case where we do not call __morestack. In this
12110 case we need to set it based on the stack pointer. */
12111 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12113 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12114 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12115 GEN_INT (UNITS_PER_WORD))));
12117 emit_label (varargs_label);
12118 LABEL_NUSES (varargs_label) = 1;
12122 /* We may have to tell the dataflow pass that the split stack prologue
12123 is initializing a scratch register. */
12125 static void
12126 ix86_live_on_entry (bitmap regs)
12128 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12130 gcc_assert (flag_split_stack);
12131 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12135 /* Extract the parts of an RTL expression that is a valid memory address
12136 for an instruction. Return 0 if the structure of the address is
12137 grossly off. Return -1 if the address contains ASHIFT, so it is not
12138 strictly valid, but still used for computing length of lea instruction. */
12141 ix86_decompose_address (rtx addr, struct ix86_address *out)
12143 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12144 rtx base_reg, index_reg;
12145 HOST_WIDE_INT scale = 1;
12146 rtx scale_rtx = NULL_RTX;
12147 rtx tmp;
12148 int retval = 1;
12149 enum ix86_address_seg seg = SEG_DEFAULT;
12151 /* Allow zero-extended SImode addresses,
12152 they will be emitted with addr32 prefix. */
12153 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12155 if (GET_CODE (addr) == ZERO_EXTEND
12156 && GET_MODE (XEXP (addr, 0)) == SImode)
12158 addr = XEXP (addr, 0);
12159 if (CONST_INT_P (addr))
12160 return 0;
12162 else if (GET_CODE (addr) == AND
12163 && const_32bit_mask (XEXP (addr, 1), DImode))
12165 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12166 if (addr == NULL_RTX)
12167 return 0;
12169 if (CONST_INT_P (addr))
12170 return 0;
12174 /* Allow SImode subregs of DImode addresses,
12175 they will be emitted with addr32 prefix. */
12176 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12178 if (GET_CODE (addr) == SUBREG
12179 && GET_MODE (SUBREG_REG (addr)) == DImode)
12181 addr = SUBREG_REG (addr);
12182 if (CONST_INT_P (addr))
12183 return 0;
12187 if (REG_P (addr))
12188 base = addr;
12189 else if (GET_CODE (addr) == SUBREG)
12191 if (REG_P (SUBREG_REG (addr)))
12192 base = addr;
12193 else
12194 return 0;
12196 else if (GET_CODE (addr) == PLUS)
12198 rtx addends[4], op;
12199 int n = 0, i;
12201 op = addr;
12204 if (n >= 4)
12205 return 0;
12206 addends[n++] = XEXP (op, 1);
12207 op = XEXP (op, 0);
12209 while (GET_CODE (op) == PLUS);
12210 if (n >= 4)
12211 return 0;
12212 addends[n] = op;
12214 for (i = n; i >= 0; --i)
12216 op = addends[i];
12217 switch (GET_CODE (op))
12219 case MULT:
12220 if (index)
12221 return 0;
12222 index = XEXP (op, 0);
12223 scale_rtx = XEXP (op, 1);
12224 break;
12226 case ASHIFT:
12227 if (index)
12228 return 0;
12229 index = XEXP (op, 0);
12230 tmp = XEXP (op, 1);
12231 if (!CONST_INT_P (tmp))
12232 return 0;
12233 scale = INTVAL (tmp);
12234 if ((unsigned HOST_WIDE_INT) scale > 3)
12235 return 0;
12236 scale = 1 << scale;
12237 break;
12239 case ZERO_EXTEND:
12240 op = XEXP (op, 0);
12241 if (GET_CODE (op) != UNSPEC)
12242 return 0;
12243 /* FALLTHRU */
12245 case UNSPEC:
12246 if (XINT (op, 1) == UNSPEC_TP
12247 && TARGET_TLS_DIRECT_SEG_REFS
12248 && seg == SEG_DEFAULT)
12249 seg = DEFAULT_TLS_SEG_REG;
12250 else
12251 return 0;
12252 break;
12254 case SUBREG:
12255 if (!REG_P (SUBREG_REG (op)))
12256 return 0;
12257 /* FALLTHRU */
12259 case REG:
12260 if (!base)
12261 base = op;
12262 else if (!index)
12263 index = op;
12264 else
12265 return 0;
12266 break;
12268 case CONST:
12269 case CONST_INT:
12270 case SYMBOL_REF:
12271 case LABEL_REF:
12272 if (disp)
12273 return 0;
12274 disp = op;
12275 break;
12277 default:
12278 return 0;
12282 else if (GET_CODE (addr) == MULT)
12284 index = XEXP (addr, 0); /* index*scale */
12285 scale_rtx = XEXP (addr, 1);
12287 else if (GET_CODE (addr) == ASHIFT)
12289 /* We're called for lea too, which implements ashift on occasion. */
12290 index = XEXP (addr, 0);
12291 tmp = XEXP (addr, 1);
12292 if (!CONST_INT_P (tmp))
12293 return 0;
12294 scale = INTVAL (tmp);
12295 if ((unsigned HOST_WIDE_INT) scale > 3)
12296 return 0;
12297 scale = 1 << scale;
12298 retval = -1;
12300 else
12301 disp = addr; /* displacement */
12303 if (index)
12305 if (REG_P (index))
12307 else if (GET_CODE (index) == SUBREG
12308 && REG_P (SUBREG_REG (index)))
12310 else
12311 return 0;
12314 /* Extract the integral value of scale. */
12315 if (scale_rtx)
12317 if (!CONST_INT_P (scale_rtx))
12318 return 0;
12319 scale = INTVAL (scale_rtx);
12322 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12323 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12325 /* Avoid useless 0 displacement. */
12326 if (disp == const0_rtx && (base || index))
12327 disp = NULL_RTX;
12329 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12330 if (base_reg && index_reg && scale == 1
12331 && (index_reg == arg_pointer_rtx
12332 || index_reg == frame_pointer_rtx
12333 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12335 rtx tmp;
12336 tmp = base, base = index, index = tmp;
12337 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12340 /* Special case: %ebp cannot be encoded as a base without a displacement.
12341 Similarly %r13. */
12342 if (!disp
12343 && base_reg
12344 && (base_reg == hard_frame_pointer_rtx
12345 || base_reg == frame_pointer_rtx
12346 || base_reg == arg_pointer_rtx
12347 || (REG_P (base_reg)
12348 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12349 || REGNO (base_reg) == R13_REG))))
12350 disp = const0_rtx;
12352 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12353 Avoid this by transforming to [%esi+0].
12354 Reload calls address legitimization without cfun defined, so we need
12355 to test cfun for being non-NULL. */
12356 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12357 && base_reg && !index_reg && !disp
12358 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12359 disp = const0_rtx;
12361 /* Special case: encode reg+reg instead of reg*2. */
12362 if (!base && index && scale == 2)
12363 base = index, base_reg = index_reg, scale = 1;
12365 /* Special case: scaling cannot be encoded without base or displacement. */
12366 if (!base && !disp && index && scale != 1)
12367 disp = const0_rtx;
12369 out->base = base;
12370 out->index = index;
12371 out->disp = disp;
12372 out->scale = scale;
12373 out->seg = seg;
12375 return retval;
12378 /* Return cost of the memory address x.
12379 For i386, it is better to use a complex address than let gcc copy
12380 the address into a reg and make a new pseudo. But not if the address
12381 requires to two regs - that would mean more pseudos with longer
12382 lifetimes. */
12383 static int
12384 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12385 addr_space_t as ATTRIBUTE_UNUSED,
12386 bool speed ATTRIBUTE_UNUSED)
12388 struct ix86_address parts;
12389 int cost = 1;
12390 int ok = ix86_decompose_address (x, &parts);
12392 gcc_assert (ok);
12394 if (parts.base && GET_CODE (parts.base) == SUBREG)
12395 parts.base = SUBREG_REG (parts.base);
12396 if (parts.index && GET_CODE (parts.index) == SUBREG)
12397 parts.index = SUBREG_REG (parts.index);
12399 /* Attempt to minimize number of registers in the address. */
12400 if ((parts.base
12401 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12402 || (parts.index
12403 && (!REG_P (parts.index)
12404 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12405 cost++;
12407 if (parts.base
12408 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12409 && parts.index
12410 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12411 && parts.base != parts.index)
12412 cost++;
12414 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12415 since it's predecode logic can't detect the length of instructions
12416 and it degenerates to vector decoded. Increase cost of such
12417 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12418 to split such addresses or even refuse such addresses at all.
12420 Following addressing modes are affected:
12421 [base+scale*index]
12422 [scale*index+disp]
12423 [base+index]
12425 The first and last case may be avoidable by explicitly coding the zero in
12426 memory address, but I don't have AMD-K6 machine handy to check this
12427 theory. */
12429 if (TARGET_K6
12430 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12431 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12432 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12433 cost += 10;
12435 return cost;
12438 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12439 this is used for to form addresses to local data when -fPIC is in
12440 use. */
12442 static bool
12443 darwin_local_data_pic (rtx disp)
12445 return (GET_CODE (disp) == UNSPEC
12446 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12449 /* Determine if a given RTX is a valid constant. We already know this
12450 satisfies CONSTANT_P. */
12452 static bool
12453 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12455 switch (GET_CODE (x))
12457 case CONST:
12458 x = XEXP (x, 0);
12460 if (GET_CODE (x) == PLUS)
12462 if (!CONST_INT_P (XEXP (x, 1)))
12463 return false;
12464 x = XEXP (x, 0);
12467 if (TARGET_MACHO && darwin_local_data_pic (x))
12468 return true;
12470 /* Only some unspecs are valid as "constants". */
12471 if (GET_CODE (x) == UNSPEC)
12472 switch (XINT (x, 1))
12474 case UNSPEC_GOT:
12475 case UNSPEC_GOTOFF:
12476 case UNSPEC_PLTOFF:
12477 return TARGET_64BIT;
12478 case UNSPEC_TPOFF:
12479 case UNSPEC_NTPOFF:
12480 x = XVECEXP (x, 0, 0);
12481 return (GET_CODE (x) == SYMBOL_REF
12482 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12483 case UNSPEC_DTPOFF:
12484 x = XVECEXP (x, 0, 0);
12485 return (GET_CODE (x) == SYMBOL_REF
12486 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12487 default:
12488 return false;
12491 /* We must have drilled down to a symbol. */
12492 if (GET_CODE (x) == LABEL_REF)
12493 return true;
12494 if (GET_CODE (x) != SYMBOL_REF)
12495 return false;
12496 /* FALLTHRU */
12498 case SYMBOL_REF:
12499 /* TLS symbols are never valid. */
12500 if (SYMBOL_REF_TLS_MODEL (x))
12501 return false;
12503 /* DLLIMPORT symbols are never valid. */
12504 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12505 && SYMBOL_REF_DLLIMPORT_P (x))
12506 return false;
12508 #if TARGET_MACHO
12509 /* mdynamic-no-pic */
12510 if (MACHO_DYNAMIC_NO_PIC_P)
12511 return machopic_symbol_defined_p (x);
12512 #endif
12513 break;
12515 case CONST_DOUBLE:
12516 if (GET_MODE (x) == TImode
12517 && x != CONST0_RTX (TImode)
12518 && !TARGET_64BIT)
12519 return false;
12520 break;
12522 case CONST_VECTOR:
12523 if (!standard_sse_constant_p (x))
12524 return false;
12526 default:
12527 break;
12530 /* Otherwise we handle everything else in the move patterns. */
12531 return true;
12534 /* Determine if it's legal to put X into the constant pool. This
12535 is not possible for the address of thread-local symbols, which
12536 is checked above. */
12538 static bool
12539 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12541 /* We can always put integral constants and vectors in memory. */
12542 switch (GET_CODE (x))
12544 case CONST_INT:
12545 case CONST_DOUBLE:
12546 case CONST_VECTOR:
12547 return false;
12549 default:
12550 break;
12552 return !ix86_legitimate_constant_p (mode, x);
12555 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12556 otherwise zero. */
12558 static bool
12559 is_imported_p (rtx x)
12561 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12562 || GET_CODE (x) != SYMBOL_REF)
12563 return false;
12565 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12569 /* Nonzero if the constant value X is a legitimate general operand
12570 when generating PIC code. It is given that flag_pic is on and
12571 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12573 bool
12574 legitimate_pic_operand_p (rtx x)
12576 rtx inner;
12578 switch (GET_CODE (x))
12580 case CONST:
12581 inner = XEXP (x, 0);
12582 if (GET_CODE (inner) == PLUS
12583 && CONST_INT_P (XEXP (inner, 1)))
12584 inner = XEXP (inner, 0);
12586 /* Only some unspecs are valid as "constants". */
12587 if (GET_CODE (inner) == UNSPEC)
12588 switch (XINT (inner, 1))
12590 case UNSPEC_GOT:
12591 case UNSPEC_GOTOFF:
12592 case UNSPEC_PLTOFF:
12593 return TARGET_64BIT;
12594 case UNSPEC_TPOFF:
12595 x = XVECEXP (inner, 0, 0);
12596 return (GET_CODE (x) == SYMBOL_REF
12597 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12598 case UNSPEC_MACHOPIC_OFFSET:
12599 return legitimate_pic_address_disp_p (x);
12600 default:
12601 return false;
12603 /* FALLTHRU */
12605 case SYMBOL_REF:
12606 case LABEL_REF:
12607 return legitimate_pic_address_disp_p (x);
12609 default:
12610 return true;
12614 /* Determine if a given CONST RTX is a valid memory displacement
12615 in PIC mode. */
12617 bool
12618 legitimate_pic_address_disp_p (rtx disp)
12620 bool saw_plus;
12622 /* In 64bit mode we can allow direct addresses of symbols and labels
12623 when they are not dynamic symbols. */
12624 if (TARGET_64BIT)
12626 rtx op0 = disp, op1;
12628 switch (GET_CODE (disp))
12630 case LABEL_REF:
12631 return true;
12633 case CONST:
12634 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12635 break;
12636 op0 = XEXP (XEXP (disp, 0), 0);
12637 op1 = XEXP (XEXP (disp, 0), 1);
12638 if (!CONST_INT_P (op1)
12639 || INTVAL (op1) >= 16*1024*1024
12640 || INTVAL (op1) < -16*1024*1024)
12641 break;
12642 if (GET_CODE (op0) == LABEL_REF)
12643 return true;
12644 if (GET_CODE (op0) == CONST
12645 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12646 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12647 return true;
12648 if (GET_CODE (op0) == UNSPEC
12649 && XINT (op0, 1) == UNSPEC_PCREL)
12650 return true;
12651 if (GET_CODE (op0) != SYMBOL_REF)
12652 break;
12653 /* FALLTHRU */
12655 case SYMBOL_REF:
12656 /* TLS references should always be enclosed in UNSPEC.
12657 The dllimported symbol needs always to be resolved. */
12658 if (SYMBOL_REF_TLS_MODEL (op0)
12659 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12660 return false;
12662 if (TARGET_PECOFF)
12664 if (is_imported_p (op0))
12665 return true;
12667 if (SYMBOL_REF_FAR_ADDR_P (op0)
12668 || !SYMBOL_REF_LOCAL_P (op0))
12669 break;
12671 /* Function-symbols need to be resolved only for
12672 large-model.
12673 For the small-model we don't need to resolve anything
12674 here. */
12675 if ((ix86_cmodel != CM_LARGE_PIC
12676 && SYMBOL_REF_FUNCTION_P (op0))
12677 || ix86_cmodel == CM_SMALL_PIC)
12678 return true;
12679 /* Non-external symbols don't need to be resolved for
12680 large, and medium-model. */
12681 if ((ix86_cmodel == CM_LARGE_PIC
12682 || ix86_cmodel == CM_MEDIUM_PIC)
12683 && !SYMBOL_REF_EXTERNAL_P (op0))
12684 return true;
12686 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12687 && SYMBOL_REF_LOCAL_P (op0)
12688 && ix86_cmodel != CM_LARGE_PIC)
12689 return true;
12690 break;
12692 default:
12693 break;
12696 if (GET_CODE (disp) != CONST)
12697 return false;
12698 disp = XEXP (disp, 0);
12700 if (TARGET_64BIT)
12702 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12703 of GOT tables. We should not need these anyway. */
12704 if (GET_CODE (disp) != UNSPEC
12705 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12706 && XINT (disp, 1) != UNSPEC_GOTOFF
12707 && XINT (disp, 1) != UNSPEC_PCREL
12708 && XINT (disp, 1) != UNSPEC_PLTOFF))
12709 return false;
12711 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12712 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12713 return false;
12714 return true;
12717 saw_plus = false;
12718 if (GET_CODE (disp) == PLUS)
12720 if (!CONST_INT_P (XEXP (disp, 1)))
12721 return false;
12722 disp = XEXP (disp, 0);
12723 saw_plus = true;
12726 if (TARGET_MACHO && darwin_local_data_pic (disp))
12727 return true;
12729 if (GET_CODE (disp) != UNSPEC)
12730 return false;
12732 switch (XINT (disp, 1))
12734 case UNSPEC_GOT:
12735 if (saw_plus)
12736 return false;
12737 /* We need to check for both symbols and labels because VxWorks loads
12738 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12739 details. */
12740 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12741 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12742 case UNSPEC_GOTOFF:
12743 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12744 While ABI specify also 32bit relocation but we don't produce it in
12745 small PIC model at all. */
12746 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12747 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12748 && !TARGET_64BIT)
12749 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12750 return false;
12751 case UNSPEC_GOTTPOFF:
12752 case UNSPEC_GOTNTPOFF:
12753 case UNSPEC_INDNTPOFF:
12754 if (saw_plus)
12755 return false;
12756 disp = XVECEXP (disp, 0, 0);
12757 return (GET_CODE (disp) == SYMBOL_REF
12758 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12759 case UNSPEC_NTPOFF:
12760 disp = XVECEXP (disp, 0, 0);
12761 return (GET_CODE (disp) == SYMBOL_REF
12762 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12763 case UNSPEC_DTPOFF:
12764 disp = XVECEXP (disp, 0, 0);
12765 return (GET_CODE (disp) == SYMBOL_REF
12766 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12769 return false;
12772 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12773 replace the input X, or the original X if no replacement is called for.
12774 The output parameter *WIN is 1 if the calling macro should goto WIN,
12775 0 if it should not. */
12777 bool
12778 ix86_legitimize_reload_address (rtx x,
12779 enum machine_mode mode ATTRIBUTE_UNUSED,
12780 int opnum, int type,
12781 int ind_levels ATTRIBUTE_UNUSED)
12783 /* Reload can generate:
12785 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12786 (reg:DI 97))
12787 (reg:DI 2 cx))
12789 This RTX is rejected from ix86_legitimate_address_p due to
12790 non-strictness of base register 97. Following this rejection,
12791 reload pushes all three components into separate registers,
12792 creating invalid memory address RTX.
12794 Following code reloads only the invalid part of the
12795 memory address RTX. */
12797 if (GET_CODE (x) == PLUS
12798 && REG_P (XEXP (x, 1))
12799 && GET_CODE (XEXP (x, 0)) == PLUS
12800 && REG_P (XEXP (XEXP (x, 0), 1)))
12802 rtx base, index;
12803 bool something_reloaded = false;
12805 base = XEXP (XEXP (x, 0), 1);
12806 if (!REG_OK_FOR_BASE_STRICT_P (base))
12808 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12809 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12810 opnum, (enum reload_type) type);
12811 something_reloaded = true;
12814 index = XEXP (x, 1);
12815 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12817 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12818 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12819 opnum, (enum reload_type) type);
12820 something_reloaded = true;
12823 gcc_assert (something_reloaded);
12824 return true;
12827 return false;
12830 /* Determine if op is suitable RTX for an address register.
12831 Return naked register if a register or a register subreg is
12832 found, otherwise return NULL_RTX. */
12834 static rtx
12835 ix86_validate_address_register (rtx op)
12837 enum machine_mode mode = GET_MODE (op);
12839 /* Only SImode or DImode registers can form the address. */
12840 if (mode != SImode && mode != DImode)
12841 return NULL_RTX;
12843 if (REG_P (op))
12844 return op;
12845 else if (GET_CODE (op) == SUBREG)
12847 rtx reg = SUBREG_REG (op);
12849 if (!REG_P (reg))
12850 return NULL_RTX;
12852 mode = GET_MODE (reg);
12854 /* Don't allow SUBREGs that span more than a word. It can
12855 lead to spill failures when the register is one word out
12856 of a two word structure. */
12857 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12858 return NULL_RTX;
12860 /* Allow only SUBREGs of non-eliminable hard registers. */
12861 if (register_no_elim_operand (reg, mode))
12862 return reg;
12865 /* Op is not a register. */
12866 return NULL_RTX;
12869 /* Recognizes RTL expressions that are valid memory addresses for an
12870 instruction. The MODE argument is the machine mode for the MEM
12871 expression that wants to use this address.
12873 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12874 convert common non-canonical forms to canonical form so that they will
12875 be recognized. */
12877 static bool
12878 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12879 rtx addr, bool strict)
12881 struct ix86_address parts;
12882 rtx base, index, disp;
12883 HOST_WIDE_INT scale;
12884 enum ix86_address_seg seg;
12886 if (ix86_decompose_address (addr, &parts) <= 0)
12887 /* Decomposition failed. */
12888 return false;
12890 base = parts.base;
12891 index = parts.index;
12892 disp = parts.disp;
12893 scale = parts.scale;
12894 seg = parts.seg;
12896 /* Validate base register. */
12897 if (base)
12899 rtx reg = ix86_validate_address_register (base);
12901 if (reg == NULL_RTX)
12902 return false;
12904 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12905 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12906 /* Base is not valid. */
12907 return false;
12910 /* Validate index register. */
12911 if (index)
12913 rtx reg = ix86_validate_address_register (index);
12915 if (reg == NULL_RTX)
12916 return false;
12918 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12919 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12920 /* Index is not valid. */
12921 return false;
12924 /* Index and base should have the same mode. */
12925 if (base && index
12926 && GET_MODE (base) != GET_MODE (index))
12927 return false;
12929 /* Address override works only on the (%reg) part of %fs:(%reg). */
12930 if (seg != SEG_DEFAULT
12931 && ((base && GET_MODE (base) != word_mode)
12932 || (index && GET_MODE (index) != word_mode)))
12933 return false;
12935 /* Validate scale factor. */
12936 if (scale != 1)
12938 if (!index)
12939 /* Scale without index. */
12940 return false;
12942 if (scale != 2 && scale != 4 && scale != 8)
12943 /* Scale is not a valid multiplier. */
12944 return false;
12947 /* Validate displacement. */
12948 if (disp)
12950 if (GET_CODE (disp) == CONST
12951 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12952 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12953 switch (XINT (XEXP (disp, 0), 1))
12955 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12956 used. While ABI specify also 32bit relocations, we don't produce
12957 them at all and use IP relative instead. */
12958 case UNSPEC_GOT:
12959 case UNSPEC_GOTOFF:
12960 gcc_assert (flag_pic);
12961 if (!TARGET_64BIT)
12962 goto is_legitimate_pic;
12964 /* 64bit address unspec. */
12965 return false;
12967 case UNSPEC_GOTPCREL:
12968 case UNSPEC_PCREL:
12969 gcc_assert (flag_pic);
12970 goto is_legitimate_pic;
12972 case UNSPEC_GOTTPOFF:
12973 case UNSPEC_GOTNTPOFF:
12974 case UNSPEC_INDNTPOFF:
12975 case UNSPEC_NTPOFF:
12976 case UNSPEC_DTPOFF:
12977 break;
12979 case UNSPEC_STACK_CHECK:
12980 gcc_assert (flag_split_stack);
12981 break;
12983 default:
12984 /* Invalid address unspec. */
12985 return false;
12988 else if (SYMBOLIC_CONST (disp)
12989 && (flag_pic
12990 || (TARGET_MACHO
12991 #if TARGET_MACHO
12992 && MACHOPIC_INDIRECT
12993 && !machopic_operand_p (disp)
12994 #endif
12998 is_legitimate_pic:
12999 if (TARGET_64BIT && (index || base))
13001 /* foo@dtpoff(%rX) is ok. */
13002 if (GET_CODE (disp) != CONST
13003 || GET_CODE (XEXP (disp, 0)) != PLUS
13004 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13005 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13006 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13007 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13008 /* Non-constant pic memory reference. */
13009 return false;
13011 else if ((!TARGET_MACHO || flag_pic)
13012 && ! legitimate_pic_address_disp_p (disp))
13013 /* Displacement is an invalid pic construct. */
13014 return false;
13015 #if TARGET_MACHO
13016 else if (MACHO_DYNAMIC_NO_PIC_P
13017 && !ix86_legitimate_constant_p (Pmode, disp))
13018 /* displacment must be referenced via non_lazy_pointer */
13019 return false;
13020 #endif
13022 /* This code used to verify that a symbolic pic displacement
13023 includes the pic_offset_table_rtx register.
13025 While this is good idea, unfortunately these constructs may
13026 be created by "adds using lea" optimization for incorrect
13027 code like:
13029 int a;
13030 int foo(int i)
13032 return *(&a+i);
13035 This code is nonsensical, but results in addressing
13036 GOT table with pic_offset_table_rtx base. We can't
13037 just refuse it easily, since it gets matched by
13038 "addsi3" pattern, that later gets split to lea in the
13039 case output register differs from input. While this
13040 can be handled by separate addsi pattern for this case
13041 that never results in lea, this seems to be easier and
13042 correct fix for crash to disable this test. */
13044 else if (GET_CODE (disp) != LABEL_REF
13045 && !CONST_INT_P (disp)
13046 && (GET_CODE (disp) != CONST
13047 || !ix86_legitimate_constant_p (Pmode, disp))
13048 && (GET_CODE (disp) != SYMBOL_REF
13049 || !ix86_legitimate_constant_p (Pmode, disp)))
13050 /* Displacement is not constant. */
13051 return false;
13052 else if (TARGET_64BIT
13053 && !x86_64_immediate_operand (disp, VOIDmode))
13054 /* Displacement is out of range. */
13055 return false;
13056 /* In x32 mode, constant addresses are sign extended to 64bit, so
13057 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13058 else if (TARGET_X32 && !(index || base)
13059 && CONST_INT_P (disp)
13060 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13061 return false;
13064 /* Everything looks valid. */
13065 return true;
13068 /* Determine if a given RTX is a valid constant address. */
13070 bool
13071 constant_address_p (rtx x)
13073 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13076 /* Return a unique alias set for the GOT. */
13078 static alias_set_type
13079 ix86_GOT_alias_set (void)
13081 static alias_set_type set = -1;
13082 if (set == -1)
13083 set = new_alias_set ();
13084 return set;
13087 /* Return a legitimate reference for ORIG (an address) using the
13088 register REG. If REG is 0, a new pseudo is generated.
13090 There are two types of references that must be handled:
13092 1. Global data references must load the address from the GOT, via
13093 the PIC reg. An insn is emitted to do this load, and the reg is
13094 returned.
13096 2. Static data references, constant pool addresses, and code labels
13097 compute the address as an offset from the GOT, whose base is in
13098 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13099 differentiate them from global data objects. The returned
13100 address is the PIC reg + an unspec constant.
13102 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13103 reg also appears in the address. */
13105 static rtx
13106 legitimize_pic_address (rtx orig, rtx reg)
13108 rtx addr = orig;
13109 rtx new_rtx = orig;
13111 #if TARGET_MACHO
13112 if (TARGET_MACHO && !TARGET_64BIT)
13114 if (reg == 0)
13115 reg = gen_reg_rtx (Pmode);
13116 /* Use the generic Mach-O PIC machinery. */
13117 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13119 #endif
13121 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13123 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13124 if (tmp)
13125 return tmp;
13128 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13129 new_rtx = addr;
13130 else if (TARGET_64BIT && !TARGET_PECOFF
13131 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13133 rtx tmpreg;
13134 /* This symbol may be referenced via a displacement from the PIC
13135 base address (@GOTOFF). */
13137 if (reload_in_progress)
13138 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13139 if (GET_CODE (addr) == CONST)
13140 addr = XEXP (addr, 0);
13141 if (GET_CODE (addr) == PLUS)
13143 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13144 UNSPEC_GOTOFF);
13145 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13147 else
13148 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13149 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13150 if (!reg)
13151 tmpreg = gen_reg_rtx (Pmode);
13152 else
13153 tmpreg = reg;
13154 emit_move_insn (tmpreg, new_rtx);
13156 if (reg != 0)
13158 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13159 tmpreg, 1, OPTAB_DIRECT);
13160 new_rtx = reg;
13162 else
13163 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13165 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13167 /* This symbol may be referenced via a displacement from the PIC
13168 base address (@GOTOFF). */
13170 if (reload_in_progress)
13171 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13172 if (GET_CODE (addr) == CONST)
13173 addr = XEXP (addr, 0);
13174 if (GET_CODE (addr) == PLUS)
13176 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13177 UNSPEC_GOTOFF);
13178 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13180 else
13181 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13182 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13183 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13185 if (reg != 0)
13187 emit_move_insn (reg, new_rtx);
13188 new_rtx = reg;
13191 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13192 /* We can't use @GOTOFF for text labels on VxWorks;
13193 see gotoff_operand. */
13194 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13196 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13197 if (tmp)
13198 return tmp;
13200 /* For x64 PE-COFF there is no GOT table. So we use address
13201 directly. */
13202 if (TARGET_64BIT && TARGET_PECOFF)
13204 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13205 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13207 if (reg == 0)
13208 reg = gen_reg_rtx (Pmode);
13209 emit_move_insn (reg, new_rtx);
13210 new_rtx = reg;
13212 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13214 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13215 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13216 new_rtx = gen_const_mem (Pmode, new_rtx);
13217 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13219 if (reg == 0)
13220 reg = gen_reg_rtx (Pmode);
13221 /* Use directly gen_movsi, otherwise the address is loaded
13222 into register for CSE. We don't want to CSE this addresses,
13223 instead we CSE addresses from the GOT table, so skip this. */
13224 emit_insn (gen_movsi (reg, new_rtx));
13225 new_rtx = reg;
13227 else
13229 /* This symbol must be referenced via a load from the
13230 Global Offset Table (@GOT). */
13232 if (reload_in_progress)
13233 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13234 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13235 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13236 if (TARGET_64BIT)
13237 new_rtx = force_reg (Pmode, new_rtx);
13238 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13239 new_rtx = gen_const_mem (Pmode, new_rtx);
13240 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13242 if (reg == 0)
13243 reg = gen_reg_rtx (Pmode);
13244 emit_move_insn (reg, new_rtx);
13245 new_rtx = reg;
13248 else
13250 if (CONST_INT_P (addr)
13251 && !x86_64_immediate_operand (addr, VOIDmode))
13253 if (reg)
13255 emit_move_insn (reg, addr);
13256 new_rtx = reg;
13258 else
13259 new_rtx = force_reg (Pmode, addr);
13261 else if (GET_CODE (addr) == CONST)
13263 addr = XEXP (addr, 0);
13265 /* We must match stuff we generate before. Assume the only
13266 unspecs that can get here are ours. Not that we could do
13267 anything with them anyway.... */
13268 if (GET_CODE (addr) == UNSPEC
13269 || (GET_CODE (addr) == PLUS
13270 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13271 return orig;
13272 gcc_assert (GET_CODE (addr) == PLUS);
13274 if (GET_CODE (addr) == PLUS)
13276 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13278 /* Check first to see if this is a constant offset from a @GOTOFF
13279 symbol reference. */
13280 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13281 && CONST_INT_P (op1))
13283 if (!TARGET_64BIT)
13285 if (reload_in_progress)
13286 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13287 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13288 UNSPEC_GOTOFF);
13289 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13290 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13291 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13293 if (reg != 0)
13295 emit_move_insn (reg, new_rtx);
13296 new_rtx = reg;
13299 else
13301 if (INTVAL (op1) < -16*1024*1024
13302 || INTVAL (op1) >= 16*1024*1024)
13304 if (!x86_64_immediate_operand (op1, Pmode))
13305 op1 = force_reg (Pmode, op1);
13306 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13310 else
13312 rtx base = legitimize_pic_address (op0, reg);
13313 enum machine_mode mode = GET_MODE (base);
13314 new_rtx
13315 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13317 if (CONST_INT_P (new_rtx))
13319 if (INTVAL (new_rtx) < -16*1024*1024
13320 || INTVAL (new_rtx) >= 16*1024*1024)
13322 if (!x86_64_immediate_operand (new_rtx, mode))
13323 new_rtx = force_reg (mode, new_rtx);
13324 new_rtx
13325 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13327 else
13328 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13330 else
13332 if (GET_CODE (new_rtx) == PLUS
13333 && CONSTANT_P (XEXP (new_rtx, 1)))
13335 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13336 new_rtx = XEXP (new_rtx, 1);
13338 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13343 return new_rtx;
13346 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13348 static rtx
13349 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13351 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13353 if (GET_MODE (tp) != tp_mode)
13355 gcc_assert (GET_MODE (tp) == SImode);
13356 gcc_assert (tp_mode == DImode);
13358 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13361 if (to_reg)
13362 tp = copy_to_mode_reg (tp_mode, tp);
13364 return tp;
13367 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13369 static GTY(()) rtx ix86_tls_symbol;
13371 static rtx
13372 ix86_tls_get_addr (void)
13374 if (!ix86_tls_symbol)
13376 const char *sym
13377 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13378 ? "___tls_get_addr" : "__tls_get_addr");
13380 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13383 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13385 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13386 UNSPEC_PLTOFF);
13387 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13388 gen_rtx_CONST (Pmode, unspec));
13391 return ix86_tls_symbol;
13394 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13396 static GTY(()) rtx ix86_tls_module_base_symbol;
13399 ix86_tls_module_base (void)
13401 if (!ix86_tls_module_base_symbol)
13403 ix86_tls_module_base_symbol
13404 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13406 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13407 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13410 return ix86_tls_module_base_symbol;
13413 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13414 false if we expect this to be used for a memory address and true if
13415 we expect to load the address into a register. */
13417 static rtx
13418 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13420 rtx dest, base, off;
13421 rtx pic = NULL_RTX, tp = NULL_RTX;
13422 enum machine_mode tp_mode = Pmode;
13423 int type;
13425 /* Fall back to global dynamic model if tool chain cannot support local
13426 dynamic. */
13427 if (TARGET_SUN_TLS && !TARGET_64BIT
13428 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13429 && model == TLS_MODEL_LOCAL_DYNAMIC)
13430 model = TLS_MODEL_GLOBAL_DYNAMIC;
13432 switch (model)
13434 case TLS_MODEL_GLOBAL_DYNAMIC:
13435 dest = gen_reg_rtx (Pmode);
13437 if (!TARGET_64BIT)
13439 if (flag_pic && !TARGET_PECOFF)
13440 pic = pic_offset_table_rtx;
13441 else
13443 pic = gen_reg_rtx (Pmode);
13444 emit_insn (gen_set_got (pic));
13448 if (TARGET_GNU2_TLS)
13450 if (TARGET_64BIT)
13451 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13452 else
13453 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13455 tp = get_thread_pointer (Pmode, true);
13456 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13458 if (GET_MODE (x) != Pmode)
13459 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13461 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13463 else
13465 rtx caddr = ix86_tls_get_addr ();
13467 if (TARGET_64BIT)
13469 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13470 rtx insns;
13472 start_sequence ();
13473 emit_call_insn
13474 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13475 insns = get_insns ();
13476 end_sequence ();
13478 if (GET_MODE (x) != Pmode)
13479 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13481 RTL_CONST_CALL_P (insns) = 1;
13482 emit_libcall_block (insns, dest, rax, x);
13484 else
13485 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13487 break;
13489 case TLS_MODEL_LOCAL_DYNAMIC:
13490 base = gen_reg_rtx (Pmode);
13492 if (!TARGET_64BIT)
13494 if (flag_pic)
13495 pic = pic_offset_table_rtx;
13496 else
13498 pic = gen_reg_rtx (Pmode);
13499 emit_insn (gen_set_got (pic));
13503 if (TARGET_GNU2_TLS)
13505 rtx tmp = ix86_tls_module_base ();
13507 if (TARGET_64BIT)
13508 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13509 else
13510 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13512 tp = get_thread_pointer (Pmode, true);
13513 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13514 gen_rtx_MINUS (Pmode, tmp, tp));
13516 else
13518 rtx caddr = ix86_tls_get_addr ();
13520 if (TARGET_64BIT)
13522 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13523 rtx insns, eqv;
13525 start_sequence ();
13526 emit_call_insn
13527 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13528 insns = get_insns ();
13529 end_sequence ();
13531 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13532 share the LD_BASE result with other LD model accesses. */
13533 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13534 UNSPEC_TLS_LD_BASE);
13536 RTL_CONST_CALL_P (insns) = 1;
13537 emit_libcall_block (insns, base, rax, eqv);
13539 else
13540 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13543 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13544 off = gen_rtx_CONST (Pmode, off);
13546 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13548 if (TARGET_GNU2_TLS)
13550 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13552 if (GET_MODE (x) != Pmode)
13553 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13555 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13557 break;
13559 case TLS_MODEL_INITIAL_EXEC:
13560 if (TARGET_64BIT)
13562 if (TARGET_SUN_TLS && !TARGET_X32)
13564 /* The Sun linker took the AMD64 TLS spec literally
13565 and can only handle %rax as destination of the
13566 initial executable code sequence. */
13568 dest = gen_reg_rtx (DImode);
13569 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13570 return dest;
13573 /* Generate DImode references to avoid %fs:(%reg32)
13574 problems and linker IE->LE relaxation bug. */
13575 tp_mode = DImode;
13576 pic = NULL;
13577 type = UNSPEC_GOTNTPOFF;
13579 else if (flag_pic)
13581 if (reload_in_progress)
13582 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13583 pic = pic_offset_table_rtx;
13584 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13586 else if (!TARGET_ANY_GNU_TLS)
13588 pic = gen_reg_rtx (Pmode);
13589 emit_insn (gen_set_got (pic));
13590 type = UNSPEC_GOTTPOFF;
13592 else
13594 pic = NULL;
13595 type = UNSPEC_INDNTPOFF;
13598 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13599 off = gen_rtx_CONST (tp_mode, off);
13600 if (pic)
13601 off = gen_rtx_PLUS (tp_mode, pic, off);
13602 off = gen_const_mem (tp_mode, off);
13603 set_mem_alias_set (off, ix86_GOT_alias_set ());
13605 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13607 base = get_thread_pointer (tp_mode,
13608 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13609 off = force_reg (tp_mode, off);
13610 return gen_rtx_PLUS (tp_mode, base, off);
13612 else
13614 base = get_thread_pointer (Pmode, true);
13615 dest = gen_reg_rtx (Pmode);
13616 emit_insn (ix86_gen_sub3 (dest, base, off));
13618 break;
13620 case TLS_MODEL_LOCAL_EXEC:
13621 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13622 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13623 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13624 off = gen_rtx_CONST (Pmode, off);
13626 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13628 base = get_thread_pointer (Pmode,
13629 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13630 return gen_rtx_PLUS (Pmode, base, off);
13632 else
13634 base = get_thread_pointer (Pmode, true);
13635 dest = gen_reg_rtx (Pmode);
13636 emit_insn (ix86_gen_sub3 (dest, base, off));
13638 break;
13640 default:
13641 gcc_unreachable ();
13644 return dest;
13647 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13648 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13649 unique refptr-DECL symbol corresponding to symbol DECL. */
13651 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13652 htab_t dllimport_map;
13654 static tree
13655 get_dllimport_decl (tree decl, bool beimport)
13657 struct tree_map *h, in;
13658 void **loc;
13659 const char *name;
13660 const char *prefix;
13661 size_t namelen, prefixlen;
13662 char *imp_name;
13663 tree to;
13664 rtx rtl;
13666 if (!dllimport_map)
13667 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13669 in.hash = htab_hash_pointer (decl);
13670 in.base.from = decl;
13671 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13672 h = (struct tree_map *) *loc;
13673 if (h)
13674 return h->to;
13676 *loc = h = ggc_alloc<tree_map> ();
13677 h->hash = in.hash;
13678 h->base.from = decl;
13679 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13680 VAR_DECL, NULL, ptr_type_node);
13681 DECL_ARTIFICIAL (to) = 1;
13682 DECL_IGNORED_P (to) = 1;
13683 DECL_EXTERNAL (to) = 1;
13684 TREE_READONLY (to) = 1;
13686 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13687 name = targetm.strip_name_encoding (name);
13688 if (beimport)
13689 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13690 ? "*__imp_" : "*__imp__";
13691 else
13692 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13693 namelen = strlen (name);
13694 prefixlen = strlen (prefix);
13695 imp_name = (char *) alloca (namelen + prefixlen + 1);
13696 memcpy (imp_name, prefix, prefixlen);
13697 memcpy (imp_name + prefixlen, name, namelen + 1);
13699 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13700 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13701 SET_SYMBOL_REF_DECL (rtl, to);
13702 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13703 if (!beimport)
13705 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13706 #ifdef SUB_TARGET_RECORD_STUB
13707 SUB_TARGET_RECORD_STUB (name);
13708 #endif
13711 rtl = gen_const_mem (Pmode, rtl);
13712 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13714 SET_DECL_RTL (to, rtl);
13715 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13717 return to;
13720 /* Expand SYMBOL into its corresponding far-addresse symbol.
13721 WANT_REG is true if we require the result be a register. */
13723 static rtx
13724 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13726 tree imp_decl;
13727 rtx x;
13729 gcc_assert (SYMBOL_REF_DECL (symbol));
13730 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13732 x = DECL_RTL (imp_decl);
13733 if (want_reg)
13734 x = force_reg (Pmode, x);
13735 return x;
13738 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13739 true if we require the result be a register. */
13741 static rtx
13742 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13744 tree imp_decl;
13745 rtx x;
13747 gcc_assert (SYMBOL_REF_DECL (symbol));
13748 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13750 x = DECL_RTL (imp_decl);
13751 if (want_reg)
13752 x = force_reg (Pmode, x);
13753 return x;
13756 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13757 is true if we require the result be a register. */
13759 static rtx
13760 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13762 if (!TARGET_PECOFF)
13763 return NULL_RTX;
13765 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13767 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13768 return legitimize_dllimport_symbol (addr, inreg);
13769 if (GET_CODE (addr) == CONST
13770 && GET_CODE (XEXP (addr, 0)) == PLUS
13771 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13772 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13774 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13775 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13779 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13780 return NULL_RTX;
13781 if (GET_CODE (addr) == SYMBOL_REF
13782 && !is_imported_p (addr)
13783 && SYMBOL_REF_EXTERNAL_P (addr)
13784 && SYMBOL_REF_DECL (addr))
13785 return legitimize_pe_coff_extern_decl (addr, inreg);
13787 if (GET_CODE (addr) == CONST
13788 && GET_CODE (XEXP (addr, 0)) == PLUS
13789 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13790 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13791 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13792 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13794 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13795 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13797 return NULL_RTX;
13800 /* Try machine-dependent ways of modifying an illegitimate address
13801 to be legitimate. If we find one, return the new, valid address.
13802 This macro is used in only one place: `memory_address' in explow.c.
13804 OLDX is the address as it was before break_out_memory_refs was called.
13805 In some cases it is useful to look at this to decide what needs to be done.
13807 It is always safe for this macro to do nothing. It exists to recognize
13808 opportunities to optimize the output.
13810 For the 80386, we handle X+REG by loading X into a register R and
13811 using R+REG. R will go in a general reg and indexing will be used.
13812 However, if REG is a broken-out memory address or multiplication,
13813 nothing needs to be done because REG can certainly go in a general reg.
13815 When -fpic is used, special handling is needed for symbolic references.
13816 See comments by legitimize_pic_address in i386.c for details. */
13818 static rtx
13819 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13820 enum machine_mode mode)
13822 int changed = 0;
13823 unsigned log;
13825 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13826 if (log)
13827 return legitimize_tls_address (x, (enum tls_model) log, false);
13828 if (GET_CODE (x) == CONST
13829 && GET_CODE (XEXP (x, 0)) == PLUS
13830 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13831 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13833 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13834 (enum tls_model) log, false);
13835 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13838 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13840 rtx tmp = legitimize_pe_coff_symbol (x, true);
13841 if (tmp)
13842 return tmp;
13845 if (flag_pic && SYMBOLIC_CONST (x))
13846 return legitimize_pic_address (x, 0);
13848 #if TARGET_MACHO
13849 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13850 return machopic_indirect_data_reference (x, 0);
13851 #endif
13853 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13854 if (GET_CODE (x) == ASHIFT
13855 && CONST_INT_P (XEXP (x, 1))
13856 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13858 changed = 1;
13859 log = INTVAL (XEXP (x, 1));
13860 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13861 GEN_INT (1 << log));
13864 if (GET_CODE (x) == PLUS)
13866 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13868 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13869 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13870 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13872 changed = 1;
13873 log = INTVAL (XEXP (XEXP (x, 0), 1));
13874 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13875 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13876 GEN_INT (1 << log));
13879 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13880 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13881 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13883 changed = 1;
13884 log = INTVAL (XEXP (XEXP (x, 1), 1));
13885 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13886 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13887 GEN_INT (1 << log));
13890 /* Put multiply first if it isn't already. */
13891 if (GET_CODE (XEXP (x, 1)) == MULT)
13893 rtx tmp = XEXP (x, 0);
13894 XEXP (x, 0) = XEXP (x, 1);
13895 XEXP (x, 1) = tmp;
13896 changed = 1;
13899 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13900 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13901 created by virtual register instantiation, register elimination, and
13902 similar optimizations. */
13903 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13905 changed = 1;
13906 x = gen_rtx_PLUS (Pmode,
13907 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13908 XEXP (XEXP (x, 1), 0)),
13909 XEXP (XEXP (x, 1), 1));
13912 /* Canonicalize
13913 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13914 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13915 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13916 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13917 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13918 && CONSTANT_P (XEXP (x, 1)))
13920 rtx constant;
13921 rtx other = NULL_RTX;
13923 if (CONST_INT_P (XEXP (x, 1)))
13925 constant = XEXP (x, 1);
13926 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13928 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13930 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13931 other = XEXP (x, 1);
13933 else
13934 constant = 0;
13936 if (constant)
13938 changed = 1;
13939 x = gen_rtx_PLUS (Pmode,
13940 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13941 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13942 plus_constant (Pmode, other,
13943 INTVAL (constant)));
13947 if (changed && ix86_legitimate_address_p (mode, x, false))
13948 return x;
13950 if (GET_CODE (XEXP (x, 0)) == MULT)
13952 changed = 1;
13953 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13956 if (GET_CODE (XEXP (x, 1)) == MULT)
13958 changed = 1;
13959 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13962 if (changed
13963 && REG_P (XEXP (x, 1))
13964 && REG_P (XEXP (x, 0)))
13965 return x;
13967 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13969 changed = 1;
13970 x = legitimize_pic_address (x, 0);
13973 if (changed && ix86_legitimate_address_p (mode, x, false))
13974 return x;
13976 if (REG_P (XEXP (x, 0)))
13978 rtx temp = gen_reg_rtx (Pmode);
13979 rtx val = force_operand (XEXP (x, 1), temp);
13980 if (val != temp)
13982 val = convert_to_mode (Pmode, val, 1);
13983 emit_move_insn (temp, val);
13986 XEXP (x, 1) = temp;
13987 return x;
13990 else if (REG_P (XEXP (x, 1)))
13992 rtx temp = gen_reg_rtx (Pmode);
13993 rtx val = force_operand (XEXP (x, 0), temp);
13994 if (val != temp)
13996 val = convert_to_mode (Pmode, val, 1);
13997 emit_move_insn (temp, val);
14000 XEXP (x, 0) = temp;
14001 return x;
14005 return x;
14008 /* Print an integer constant expression in assembler syntax. Addition
14009 and subtraction are the only arithmetic that may appear in these
14010 expressions. FILE is the stdio stream to write to, X is the rtx, and
14011 CODE is the operand print code from the output string. */
14013 static void
14014 output_pic_addr_const (FILE *file, rtx x, int code)
14016 char buf[256];
14018 switch (GET_CODE (x))
14020 case PC:
14021 gcc_assert (flag_pic);
14022 putc ('.', file);
14023 break;
14025 case SYMBOL_REF:
14026 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14027 output_addr_const (file, x);
14028 else
14030 const char *name = XSTR (x, 0);
14032 /* Mark the decl as referenced so that cgraph will
14033 output the function. */
14034 if (SYMBOL_REF_DECL (x))
14035 mark_decl_referenced (SYMBOL_REF_DECL (x));
14037 #if TARGET_MACHO
14038 if (MACHOPIC_INDIRECT
14039 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14040 name = machopic_indirection_name (x, /*stub_p=*/true);
14041 #endif
14042 assemble_name (file, name);
14044 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14045 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14046 fputs ("@PLT", file);
14047 break;
14049 case LABEL_REF:
14050 x = XEXP (x, 0);
14051 /* FALLTHRU */
14052 case CODE_LABEL:
14053 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14054 assemble_name (asm_out_file, buf);
14055 break;
14057 case CONST_INT:
14058 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14059 break;
14061 case CONST:
14062 /* This used to output parentheses around the expression,
14063 but that does not work on the 386 (either ATT or BSD assembler). */
14064 output_pic_addr_const (file, XEXP (x, 0), code);
14065 break;
14067 case CONST_DOUBLE:
14068 if (GET_MODE (x) == VOIDmode)
14070 /* We can use %d if the number is <32 bits and positive. */
14071 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14072 fprintf (file, "0x%lx%08lx",
14073 (unsigned long) CONST_DOUBLE_HIGH (x),
14074 (unsigned long) CONST_DOUBLE_LOW (x));
14075 else
14076 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14078 else
14079 /* We can't handle floating point constants;
14080 TARGET_PRINT_OPERAND must handle them. */
14081 output_operand_lossage ("floating constant misused");
14082 break;
14084 case PLUS:
14085 /* Some assemblers need integer constants to appear first. */
14086 if (CONST_INT_P (XEXP (x, 0)))
14088 output_pic_addr_const (file, XEXP (x, 0), code);
14089 putc ('+', file);
14090 output_pic_addr_const (file, XEXP (x, 1), code);
14092 else
14094 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14095 output_pic_addr_const (file, XEXP (x, 1), code);
14096 putc ('+', file);
14097 output_pic_addr_const (file, XEXP (x, 0), code);
14099 break;
14101 case MINUS:
14102 if (!TARGET_MACHO)
14103 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14104 output_pic_addr_const (file, XEXP (x, 0), code);
14105 putc ('-', file);
14106 output_pic_addr_const (file, XEXP (x, 1), code);
14107 if (!TARGET_MACHO)
14108 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14109 break;
14111 case UNSPEC:
14112 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14114 bool f = i386_asm_output_addr_const_extra (file, x);
14115 gcc_assert (f);
14116 break;
14119 gcc_assert (XVECLEN (x, 0) == 1);
14120 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14121 switch (XINT (x, 1))
14123 case UNSPEC_GOT:
14124 fputs ("@GOT", file);
14125 break;
14126 case UNSPEC_GOTOFF:
14127 fputs ("@GOTOFF", file);
14128 break;
14129 case UNSPEC_PLTOFF:
14130 fputs ("@PLTOFF", file);
14131 break;
14132 case UNSPEC_PCREL:
14133 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14134 "(%rip)" : "[rip]", file);
14135 break;
14136 case UNSPEC_GOTPCREL:
14137 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14138 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14139 break;
14140 case UNSPEC_GOTTPOFF:
14141 /* FIXME: This might be @TPOFF in Sun ld too. */
14142 fputs ("@gottpoff", file);
14143 break;
14144 case UNSPEC_TPOFF:
14145 fputs ("@tpoff", file);
14146 break;
14147 case UNSPEC_NTPOFF:
14148 if (TARGET_64BIT)
14149 fputs ("@tpoff", file);
14150 else
14151 fputs ("@ntpoff", file);
14152 break;
14153 case UNSPEC_DTPOFF:
14154 fputs ("@dtpoff", file);
14155 break;
14156 case UNSPEC_GOTNTPOFF:
14157 if (TARGET_64BIT)
14158 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14159 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14160 else
14161 fputs ("@gotntpoff", file);
14162 break;
14163 case UNSPEC_INDNTPOFF:
14164 fputs ("@indntpoff", file);
14165 break;
14166 #if TARGET_MACHO
14167 case UNSPEC_MACHOPIC_OFFSET:
14168 putc ('-', file);
14169 machopic_output_function_base_name (file);
14170 break;
14171 #endif
14172 default:
14173 output_operand_lossage ("invalid UNSPEC as operand");
14174 break;
14176 break;
14178 default:
14179 output_operand_lossage ("invalid expression as operand");
14183 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14184 We need to emit DTP-relative relocations. */
14186 static void ATTRIBUTE_UNUSED
14187 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14189 fputs (ASM_LONG, file);
14190 output_addr_const (file, x);
14191 fputs ("@dtpoff", file);
14192 switch (size)
14194 case 4:
14195 break;
14196 case 8:
14197 fputs (", 0", file);
14198 break;
14199 default:
14200 gcc_unreachable ();
14204 /* Return true if X is a representation of the PIC register. This copes
14205 with calls from ix86_find_base_term, where the register might have
14206 been replaced by a cselib value. */
14208 static bool
14209 ix86_pic_register_p (rtx x)
14211 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14212 return (pic_offset_table_rtx
14213 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14214 else
14215 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14218 /* Helper function for ix86_delegitimize_address.
14219 Attempt to delegitimize TLS local-exec accesses. */
14221 static rtx
14222 ix86_delegitimize_tls_address (rtx orig_x)
14224 rtx x = orig_x, unspec;
14225 struct ix86_address addr;
14227 if (!TARGET_TLS_DIRECT_SEG_REFS)
14228 return orig_x;
14229 if (MEM_P (x))
14230 x = XEXP (x, 0);
14231 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14232 return orig_x;
14233 if (ix86_decompose_address (x, &addr) == 0
14234 || addr.seg != DEFAULT_TLS_SEG_REG
14235 || addr.disp == NULL_RTX
14236 || GET_CODE (addr.disp) != CONST)
14237 return orig_x;
14238 unspec = XEXP (addr.disp, 0);
14239 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14240 unspec = XEXP (unspec, 0);
14241 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14242 return orig_x;
14243 x = XVECEXP (unspec, 0, 0);
14244 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14245 if (unspec != XEXP (addr.disp, 0))
14246 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14247 if (addr.index)
14249 rtx idx = addr.index;
14250 if (addr.scale != 1)
14251 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14252 x = gen_rtx_PLUS (Pmode, idx, x);
14254 if (addr.base)
14255 x = gen_rtx_PLUS (Pmode, addr.base, x);
14256 if (MEM_P (orig_x))
14257 x = replace_equiv_address_nv (orig_x, x);
14258 return x;
14261 /* In the name of slightly smaller debug output, and to cater to
14262 general assembler lossage, recognize PIC+GOTOFF and turn it back
14263 into a direct symbol reference.
14265 On Darwin, this is necessary to avoid a crash, because Darwin
14266 has a different PIC label for each routine but the DWARF debugging
14267 information is not associated with any particular routine, so it's
14268 necessary to remove references to the PIC label from RTL stored by
14269 the DWARF output code. */
14271 static rtx
14272 ix86_delegitimize_address (rtx x)
14274 rtx orig_x = delegitimize_mem_from_attrs (x);
14275 /* addend is NULL or some rtx if x is something+GOTOFF where
14276 something doesn't include the PIC register. */
14277 rtx addend = NULL_RTX;
14278 /* reg_addend is NULL or a multiple of some register. */
14279 rtx reg_addend = NULL_RTX;
14280 /* const_addend is NULL or a const_int. */
14281 rtx const_addend = NULL_RTX;
14282 /* This is the result, or NULL. */
14283 rtx result = NULL_RTX;
14285 x = orig_x;
14287 if (MEM_P (x))
14288 x = XEXP (x, 0);
14290 if (TARGET_64BIT)
14292 if (GET_CODE (x) == CONST
14293 && GET_CODE (XEXP (x, 0)) == PLUS
14294 && GET_MODE (XEXP (x, 0)) == Pmode
14295 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14296 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14297 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14299 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14300 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14301 if (MEM_P (orig_x))
14302 x = replace_equiv_address_nv (orig_x, x);
14303 return x;
14306 if (GET_CODE (x) == CONST
14307 && GET_CODE (XEXP (x, 0)) == UNSPEC
14308 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14309 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14310 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14312 x = XVECEXP (XEXP (x, 0), 0, 0);
14313 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14315 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14316 GET_MODE (x), 0);
14317 if (x == NULL_RTX)
14318 return orig_x;
14320 return x;
14323 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14324 return ix86_delegitimize_tls_address (orig_x);
14326 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14327 and -mcmodel=medium -fpic. */
14330 if (GET_CODE (x) != PLUS
14331 || GET_CODE (XEXP (x, 1)) != CONST)
14332 return ix86_delegitimize_tls_address (orig_x);
14334 if (ix86_pic_register_p (XEXP (x, 0)))
14335 /* %ebx + GOT/GOTOFF */
14337 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14339 /* %ebx + %reg * scale + GOT/GOTOFF */
14340 reg_addend = XEXP (x, 0);
14341 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14342 reg_addend = XEXP (reg_addend, 1);
14343 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14344 reg_addend = XEXP (reg_addend, 0);
14345 else
14347 reg_addend = NULL_RTX;
14348 addend = XEXP (x, 0);
14351 else
14352 addend = XEXP (x, 0);
14354 x = XEXP (XEXP (x, 1), 0);
14355 if (GET_CODE (x) == PLUS
14356 && CONST_INT_P (XEXP (x, 1)))
14358 const_addend = XEXP (x, 1);
14359 x = XEXP (x, 0);
14362 if (GET_CODE (x) == UNSPEC
14363 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14364 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14365 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14366 && !MEM_P (orig_x) && !addend)))
14367 result = XVECEXP (x, 0, 0);
14369 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14370 && !MEM_P (orig_x))
14371 result = XVECEXP (x, 0, 0);
14373 if (! result)
14374 return ix86_delegitimize_tls_address (orig_x);
14376 if (const_addend)
14377 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14378 if (reg_addend)
14379 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14380 if (addend)
14382 /* If the rest of original X doesn't involve the PIC register, add
14383 addend and subtract pic_offset_table_rtx. This can happen e.g.
14384 for code like:
14385 leal (%ebx, %ecx, 4), %ecx
14387 movl foo@GOTOFF(%ecx), %edx
14388 in which case we return (%ecx - %ebx) + foo. */
14389 if (pic_offset_table_rtx)
14390 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14391 pic_offset_table_rtx),
14392 result);
14393 else
14394 return orig_x;
14396 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14398 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14399 if (result == NULL_RTX)
14400 return orig_x;
14402 return result;
14405 /* If X is a machine specific address (i.e. a symbol or label being
14406 referenced as a displacement from the GOT implemented using an
14407 UNSPEC), then return the base term. Otherwise return X. */
14410 ix86_find_base_term (rtx x)
14412 rtx term;
14414 if (TARGET_64BIT)
14416 if (GET_CODE (x) != CONST)
14417 return x;
14418 term = XEXP (x, 0);
14419 if (GET_CODE (term) == PLUS
14420 && (CONST_INT_P (XEXP (term, 1))
14421 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14422 term = XEXP (term, 0);
14423 if (GET_CODE (term) != UNSPEC
14424 || (XINT (term, 1) != UNSPEC_GOTPCREL
14425 && XINT (term, 1) != UNSPEC_PCREL))
14426 return x;
14428 return XVECEXP (term, 0, 0);
14431 return ix86_delegitimize_address (x);
14434 static void
14435 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14436 bool fp, FILE *file)
14438 const char *suffix;
14440 if (mode == CCFPmode || mode == CCFPUmode)
14442 code = ix86_fp_compare_code_to_integer (code);
14443 mode = CCmode;
14445 if (reverse)
14446 code = reverse_condition (code);
14448 switch (code)
14450 case EQ:
14451 switch (mode)
14453 case CCAmode:
14454 suffix = "a";
14455 break;
14457 case CCCmode:
14458 suffix = "c";
14459 break;
14461 case CCOmode:
14462 suffix = "o";
14463 break;
14465 case CCSmode:
14466 suffix = "s";
14467 break;
14469 default:
14470 suffix = "e";
14472 break;
14473 case NE:
14474 switch (mode)
14476 case CCAmode:
14477 suffix = "na";
14478 break;
14480 case CCCmode:
14481 suffix = "nc";
14482 break;
14484 case CCOmode:
14485 suffix = "no";
14486 break;
14488 case CCSmode:
14489 suffix = "ns";
14490 break;
14492 default:
14493 suffix = "ne";
14495 break;
14496 case GT:
14497 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14498 suffix = "g";
14499 break;
14500 case GTU:
14501 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14502 Those same assemblers have the same but opposite lossage on cmov. */
14503 if (mode == CCmode)
14504 suffix = fp ? "nbe" : "a";
14505 else
14506 gcc_unreachable ();
14507 break;
14508 case LT:
14509 switch (mode)
14511 case CCNOmode:
14512 case CCGOCmode:
14513 suffix = "s";
14514 break;
14516 case CCmode:
14517 case CCGCmode:
14518 suffix = "l";
14519 break;
14521 default:
14522 gcc_unreachable ();
14524 break;
14525 case LTU:
14526 if (mode == CCmode)
14527 suffix = "b";
14528 else if (mode == CCCmode)
14529 suffix = "c";
14530 else
14531 gcc_unreachable ();
14532 break;
14533 case GE:
14534 switch (mode)
14536 case CCNOmode:
14537 case CCGOCmode:
14538 suffix = "ns";
14539 break;
14541 case CCmode:
14542 case CCGCmode:
14543 suffix = "ge";
14544 break;
14546 default:
14547 gcc_unreachable ();
14549 break;
14550 case GEU:
14551 if (mode == CCmode)
14552 suffix = fp ? "nb" : "ae";
14553 else if (mode == CCCmode)
14554 suffix = "nc";
14555 else
14556 gcc_unreachable ();
14557 break;
14558 case LE:
14559 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14560 suffix = "le";
14561 break;
14562 case LEU:
14563 if (mode == CCmode)
14564 suffix = "be";
14565 else
14566 gcc_unreachable ();
14567 break;
14568 case UNORDERED:
14569 suffix = fp ? "u" : "p";
14570 break;
14571 case ORDERED:
14572 suffix = fp ? "nu" : "np";
14573 break;
14574 default:
14575 gcc_unreachable ();
14577 fputs (suffix, file);
14580 /* Print the name of register X to FILE based on its machine mode and number.
14581 If CODE is 'w', pretend the mode is HImode.
14582 If CODE is 'b', pretend the mode is QImode.
14583 If CODE is 'k', pretend the mode is SImode.
14584 If CODE is 'q', pretend the mode is DImode.
14585 If CODE is 'x', pretend the mode is V4SFmode.
14586 If CODE is 't', pretend the mode is V8SFmode.
14587 If CODE is 'g', pretend the mode is V16SFmode.
14588 If CODE is 'h', pretend the reg is the 'high' byte register.
14589 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14590 If CODE is 'd', duplicate the operand for AVX instruction.
14593 void
14594 print_reg (rtx x, int code, FILE *file)
14596 const char *reg;
14597 unsigned int regno;
14598 bool duplicated = code == 'd' && TARGET_AVX;
14600 if (ASSEMBLER_DIALECT == ASM_ATT)
14601 putc ('%', file);
14603 if (x == pc_rtx)
14605 gcc_assert (TARGET_64BIT);
14606 fputs ("rip", file);
14607 return;
14610 regno = true_regnum (x);
14611 gcc_assert (regno != ARG_POINTER_REGNUM
14612 && regno != FRAME_POINTER_REGNUM
14613 && regno != FLAGS_REG
14614 && regno != FPSR_REG
14615 && regno != FPCR_REG);
14617 if (code == 'w' || MMX_REG_P (x))
14618 code = 2;
14619 else if (code == 'b')
14620 code = 1;
14621 else if (code == 'k')
14622 code = 4;
14623 else if (code == 'q')
14624 code = 8;
14625 else if (code == 'y')
14626 code = 3;
14627 else if (code == 'h')
14628 code = 0;
14629 else if (code == 'x')
14630 code = 16;
14631 else if (code == 't')
14632 code = 32;
14633 else if (code == 'g')
14634 code = 64;
14635 else
14636 code = GET_MODE_SIZE (GET_MODE (x));
14638 /* Irritatingly, AMD extended registers use different naming convention
14639 from the normal registers: "r%d[bwd]" */
14640 if (REX_INT_REGNO_P (regno))
14642 gcc_assert (TARGET_64BIT);
14643 putc ('r', file);
14644 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14645 switch (code)
14647 case 0:
14648 error ("extended registers have no high halves");
14649 break;
14650 case 1:
14651 putc ('b', file);
14652 break;
14653 case 2:
14654 putc ('w', file);
14655 break;
14656 case 4:
14657 putc ('d', file);
14658 break;
14659 case 8:
14660 /* no suffix */
14661 break;
14662 default:
14663 error ("unsupported operand size for extended register");
14664 break;
14666 return;
14669 reg = NULL;
14670 switch (code)
14672 case 3:
14673 if (STACK_TOP_P (x))
14675 reg = "st(0)";
14676 break;
14678 /* FALLTHRU */
14679 case 8:
14680 case 4:
14681 case 12:
14682 if (! ANY_FP_REG_P (x))
14683 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14684 /* FALLTHRU */
14685 case 16:
14686 case 2:
14687 normal:
14688 reg = hi_reg_name[regno];
14689 break;
14690 case 1:
14691 if (regno >= ARRAY_SIZE (qi_reg_name))
14692 goto normal;
14693 reg = qi_reg_name[regno];
14694 break;
14695 case 0:
14696 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14697 goto normal;
14698 reg = qi_high_reg_name[regno];
14699 break;
14700 case 32:
14701 if (SSE_REG_P (x))
14703 gcc_assert (!duplicated);
14704 putc ('y', file);
14705 fputs (hi_reg_name[regno] + 1, file);
14706 return;
14708 case 64:
14709 if (SSE_REG_P (x))
14711 gcc_assert (!duplicated);
14712 putc ('z', file);
14713 fputs (hi_reg_name[REGNO (x)] + 1, file);
14714 return;
14716 break;
14717 default:
14718 gcc_unreachable ();
14721 fputs (reg, file);
14722 if (duplicated)
14724 if (ASSEMBLER_DIALECT == ASM_ATT)
14725 fprintf (file, ", %%%s", reg);
14726 else
14727 fprintf (file, ", %s", reg);
14731 /* Locate some local-dynamic symbol still in use by this function
14732 so that we can print its name in some tls_local_dynamic_base
14733 pattern. */
14735 static int
14736 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14738 rtx x = *px;
14740 if (GET_CODE (x) == SYMBOL_REF
14741 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14743 cfun->machine->some_ld_name = XSTR (x, 0);
14744 return 1;
14747 return 0;
14750 static const char *
14751 get_some_local_dynamic_name (void)
14753 rtx insn;
14755 if (cfun->machine->some_ld_name)
14756 return cfun->machine->some_ld_name;
14758 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14759 if (NONDEBUG_INSN_P (insn)
14760 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14761 return cfun->machine->some_ld_name;
14763 return NULL;
14766 /* Meaning of CODE:
14767 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14768 C -- print opcode suffix for set/cmov insn.
14769 c -- like C, but print reversed condition
14770 F,f -- likewise, but for floating-point.
14771 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14772 otherwise nothing
14773 R -- print embeded rounding and sae.
14774 r -- print only sae.
14775 z -- print the opcode suffix for the size of the current operand.
14776 Z -- likewise, with special suffixes for x87 instructions.
14777 * -- print a star (in certain assembler syntax)
14778 A -- print an absolute memory reference.
14779 E -- print address with DImode register names if TARGET_64BIT.
14780 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14781 s -- print a shift double count, followed by the assemblers argument
14782 delimiter.
14783 b -- print the QImode name of the register for the indicated operand.
14784 %b0 would print %al if operands[0] is reg 0.
14785 w -- likewise, print the HImode name of the register.
14786 k -- likewise, print the SImode name of the register.
14787 q -- likewise, print the DImode name of the register.
14788 x -- likewise, print the V4SFmode name of the register.
14789 t -- likewise, print the V8SFmode name of the register.
14790 g -- likewise, print the V16SFmode name of the register.
14791 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14792 y -- print "st(0)" instead of "st" as a register.
14793 d -- print duplicated register operand for AVX instruction.
14794 D -- print condition for SSE cmp instruction.
14795 P -- if PIC, print an @PLT suffix.
14796 p -- print raw symbol name.
14797 X -- don't print any sort of PIC '@' suffix for a symbol.
14798 & -- print some in-use local-dynamic symbol name.
14799 H -- print a memory address offset by 8; used for sse high-parts
14800 Y -- print condition for XOP pcom* instruction.
14801 + -- print a branch hint as 'cs' or 'ds' prefix
14802 ; -- print a semicolon (after prefixes due to bug in older gas).
14803 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14804 @ -- print a segment register of thread base pointer load
14805 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14808 void
14809 ix86_print_operand (FILE *file, rtx x, int code)
14811 if (code)
14813 switch (code)
14815 case 'A':
14816 switch (ASSEMBLER_DIALECT)
14818 case ASM_ATT:
14819 putc ('*', file);
14820 break;
14822 case ASM_INTEL:
14823 /* Intel syntax. For absolute addresses, registers should not
14824 be surrounded by braces. */
14825 if (!REG_P (x))
14827 putc ('[', file);
14828 ix86_print_operand (file, x, 0);
14829 putc (']', file);
14830 return;
14832 break;
14834 default:
14835 gcc_unreachable ();
14838 ix86_print_operand (file, x, 0);
14839 return;
14841 case 'E':
14842 /* Wrap address in an UNSPEC to declare special handling. */
14843 if (TARGET_64BIT)
14844 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14846 output_address (x);
14847 return;
14849 case 'L':
14850 if (ASSEMBLER_DIALECT == ASM_ATT)
14851 putc ('l', file);
14852 return;
14854 case 'W':
14855 if (ASSEMBLER_DIALECT == ASM_ATT)
14856 putc ('w', file);
14857 return;
14859 case 'B':
14860 if (ASSEMBLER_DIALECT == ASM_ATT)
14861 putc ('b', file);
14862 return;
14864 case 'Q':
14865 if (ASSEMBLER_DIALECT == ASM_ATT)
14866 putc ('l', file);
14867 return;
14869 case 'S':
14870 if (ASSEMBLER_DIALECT == ASM_ATT)
14871 putc ('s', file);
14872 return;
14874 case 'T':
14875 if (ASSEMBLER_DIALECT == ASM_ATT)
14876 putc ('t', file);
14877 return;
14879 case 'O':
14880 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14881 if (ASSEMBLER_DIALECT != ASM_ATT)
14882 return;
14884 switch (GET_MODE_SIZE (GET_MODE (x)))
14886 case 2:
14887 putc ('w', file);
14888 break;
14890 case 4:
14891 putc ('l', file);
14892 break;
14894 case 8:
14895 putc ('q', file);
14896 break;
14898 default:
14899 output_operand_lossage
14900 ("invalid operand size for operand code 'O'");
14901 return;
14904 putc ('.', file);
14905 #endif
14906 return;
14908 case 'z':
14909 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14911 /* Opcodes don't get size suffixes if using Intel opcodes. */
14912 if (ASSEMBLER_DIALECT == ASM_INTEL)
14913 return;
14915 switch (GET_MODE_SIZE (GET_MODE (x)))
14917 case 1:
14918 putc ('b', file);
14919 return;
14921 case 2:
14922 putc ('w', file);
14923 return;
14925 case 4:
14926 putc ('l', file);
14927 return;
14929 case 8:
14930 putc ('q', file);
14931 return;
14933 default:
14934 output_operand_lossage
14935 ("invalid operand size for operand code 'z'");
14936 return;
14940 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14941 warning
14942 (0, "non-integer operand used with operand code 'z'");
14943 /* FALLTHRU */
14945 case 'Z':
14946 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14947 if (ASSEMBLER_DIALECT == ASM_INTEL)
14948 return;
14950 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14952 switch (GET_MODE_SIZE (GET_MODE (x)))
14954 case 2:
14955 #ifdef HAVE_AS_IX86_FILDS
14956 putc ('s', file);
14957 #endif
14958 return;
14960 case 4:
14961 putc ('l', file);
14962 return;
14964 case 8:
14965 #ifdef HAVE_AS_IX86_FILDQ
14966 putc ('q', file);
14967 #else
14968 fputs ("ll", file);
14969 #endif
14970 return;
14972 default:
14973 break;
14976 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14978 /* 387 opcodes don't get size suffixes
14979 if the operands are registers. */
14980 if (STACK_REG_P (x))
14981 return;
14983 switch (GET_MODE_SIZE (GET_MODE (x)))
14985 case 4:
14986 putc ('s', file);
14987 return;
14989 case 8:
14990 putc ('l', file);
14991 return;
14993 case 12:
14994 case 16:
14995 putc ('t', file);
14996 return;
14998 default:
14999 break;
15002 else
15004 output_operand_lossage
15005 ("invalid operand type used with operand code 'Z'");
15006 return;
15009 output_operand_lossage
15010 ("invalid operand size for operand code 'Z'");
15011 return;
15013 case 'd':
15014 case 'b':
15015 case 'w':
15016 case 'k':
15017 case 'q':
15018 case 'h':
15019 case 't':
15020 case 'g':
15021 case 'y':
15022 case 'x':
15023 case 'X':
15024 case 'P':
15025 case 'p':
15026 break;
15028 case 's':
15029 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15031 ix86_print_operand (file, x, 0);
15032 fputs (", ", file);
15034 return;
15036 case 'Y':
15037 switch (GET_CODE (x))
15039 case NE:
15040 fputs ("neq", file);
15041 break;
15042 case EQ:
15043 fputs ("eq", file);
15044 break;
15045 case GE:
15046 case GEU:
15047 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15048 break;
15049 case GT:
15050 case GTU:
15051 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15052 break;
15053 case LE:
15054 case LEU:
15055 fputs ("le", file);
15056 break;
15057 case LT:
15058 case LTU:
15059 fputs ("lt", file);
15060 break;
15061 case UNORDERED:
15062 fputs ("unord", file);
15063 break;
15064 case ORDERED:
15065 fputs ("ord", file);
15066 break;
15067 case UNEQ:
15068 fputs ("ueq", file);
15069 break;
15070 case UNGE:
15071 fputs ("nlt", file);
15072 break;
15073 case UNGT:
15074 fputs ("nle", file);
15075 break;
15076 case UNLE:
15077 fputs ("ule", file);
15078 break;
15079 case UNLT:
15080 fputs ("ult", file);
15081 break;
15082 case LTGT:
15083 fputs ("une", file);
15084 break;
15085 default:
15086 output_operand_lossage ("operand is not a condition code, "
15087 "invalid operand code 'Y'");
15088 return;
15090 return;
15092 case 'D':
15093 /* Little bit of braindamage here. The SSE compare instructions
15094 does use completely different names for the comparisons that the
15095 fp conditional moves. */
15096 switch (GET_CODE (x))
15098 case UNEQ:
15099 if (TARGET_AVX)
15101 fputs ("eq_us", file);
15102 break;
15104 case EQ:
15105 fputs ("eq", file);
15106 break;
15107 case UNLT:
15108 if (TARGET_AVX)
15110 fputs ("nge", file);
15111 break;
15113 case LT:
15114 fputs ("lt", file);
15115 break;
15116 case UNLE:
15117 if (TARGET_AVX)
15119 fputs ("ngt", file);
15120 break;
15122 case LE:
15123 fputs ("le", file);
15124 break;
15125 case UNORDERED:
15126 fputs ("unord", file);
15127 break;
15128 case LTGT:
15129 if (TARGET_AVX)
15131 fputs ("neq_oq", file);
15132 break;
15134 case NE:
15135 fputs ("neq", file);
15136 break;
15137 case GE:
15138 if (TARGET_AVX)
15140 fputs ("ge", file);
15141 break;
15143 case UNGE:
15144 fputs ("nlt", file);
15145 break;
15146 case GT:
15147 if (TARGET_AVX)
15149 fputs ("gt", file);
15150 break;
15152 case UNGT:
15153 fputs ("nle", file);
15154 break;
15155 case ORDERED:
15156 fputs ("ord", file);
15157 break;
15158 default:
15159 output_operand_lossage ("operand is not a condition code, "
15160 "invalid operand code 'D'");
15161 return;
15163 return;
15165 case 'F':
15166 case 'f':
15167 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15168 if (ASSEMBLER_DIALECT == ASM_ATT)
15169 putc ('.', file);
15170 #endif
15172 case 'C':
15173 case 'c':
15174 if (!COMPARISON_P (x))
15176 output_operand_lossage ("operand is not a condition code, "
15177 "invalid operand code '%c'", code);
15178 return;
15180 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15181 code == 'c' || code == 'f',
15182 code == 'F' || code == 'f',
15183 file);
15184 return;
15186 case 'H':
15187 if (!offsettable_memref_p (x))
15189 output_operand_lossage ("operand is not an offsettable memory "
15190 "reference, invalid operand code 'H'");
15191 return;
15193 /* It doesn't actually matter what mode we use here, as we're
15194 only going to use this for printing. */
15195 x = adjust_address_nv (x, DImode, 8);
15196 /* Output 'qword ptr' for intel assembler dialect. */
15197 if (ASSEMBLER_DIALECT == ASM_INTEL)
15198 code = 'q';
15199 break;
15201 case 'K':
15202 gcc_assert (CONST_INT_P (x));
15204 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15205 #ifdef HAVE_AS_IX86_HLE
15206 fputs ("xacquire ", file);
15207 #else
15208 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15209 #endif
15210 else if (INTVAL (x) & IX86_HLE_RELEASE)
15211 #ifdef HAVE_AS_IX86_HLE
15212 fputs ("xrelease ", file);
15213 #else
15214 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15215 #endif
15216 /* We do not want to print value of the operand. */
15217 return;
15219 case 'N':
15220 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15221 fputs ("{z}", file);
15222 return;
15224 case 'r':
15225 gcc_assert (CONST_INT_P (x));
15226 gcc_assert (INTVAL (x) == ROUND_SAE);
15228 if (ASSEMBLER_DIALECT == ASM_INTEL)
15229 fputs (", ", file);
15231 fputs ("{sae}", file);
15233 if (ASSEMBLER_DIALECT == ASM_ATT)
15234 fputs (", ", file);
15236 return;
15238 case 'R':
15239 gcc_assert (CONST_INT_P (x));
15241 if (ASSEMBLER_DIALECT == ASM_INTEL)
15242 fputs (", ", file);
15244 switch (INTVAL (x))
15246 case ROUND_NEAREST_INT | ROUND_SAE:
15247 fputs ("{rn-sae}", file);
15248 break;
15249 case ROUND_NEG_INF | ROUND_SAE:
15250 fputs ("{rd-sae}", file);
15251 break;
15252 case ROUND_POS_INF | ROUND_SAE:
15253 fputs ("{ru-sae}", file);
15254 break;
15255 case ROUND_ZERO | ROUND_SAE:
15256 fputs ("{rz-sae}", file);
15257 break;
15258 default:
15259 gcc_unreachable ();
15262 if (ASSEMBLER_DIALECT == ASM_ATT)
15263 fputs (", ", file);
15265 return;
15267 case '*':
15268 if (ASSEMBLER_DIALECT == ASM_ATT)
15269 putc ('*', file);
15270 return;
15272 case '&':
15274 const char *name = get_some_local_dynamic_name ();
15275 if (name == NULL)
15276 output_operand_lossage ("'%%&' used without any "
15277 "local dynamic TLS references");
15278 else
15279 assemble_name (file, name);
15280 return;
15283 case '+':
15285 rtx x;
15287 if (!optimize
15288 || optimize_function_for_size_p (cfun)
15289 || !TARGET_BRANCH_PREDICTION_HINTS)
15290 return;
15292 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15293 if (x)
15295 int pred_val = XINT (x, 0);
15297 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15298 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15300 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15301 bool cputaken
15302 = final_forward_branch_p (current_output_insn) == 0;
15304 /* Emit hints only in the case default branch prediction
15305 heuristics would fail. */
15306 if (taken != cputaken)
15308 /* We use 3e (DS) prefix for taken branches and
15309 2e (CS) prefix for not taken branches. */
15310 if (taken)
15311 fputs ("ds ; ", file);
15312 else
15313 fputs ("cs ; ", file);
15317 return;
15320 case ';':
15321 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15322 putc (';', file);
15323 #endif
15324 return;
15326 case '@':
15327 if (ASSEMBLER_DIALECT == ASM_ATT)
15328 putc ('%', file);
15330 /* The kernel uses a different segment register for performance
15331 reasons; a system call would not have to trash the userspace
15332 segment register, which would be expensive. */
15333 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15334 fputs ("fs", file);
15335 else
15336 fputs ("gs", file);
15337 return;
15339 case '~':
15340 putc (TARGET_AVX2 ? 'i' : 'f', file);
15341 return;
15343 case '^':
15344 if (TARGET_64BIT && Pmode != word_mode)
15345 fputs ("addr32 ", file);
15346 return;
15348 default:
15349 output_operand_lossage ("invalid operand code '%c'", code);
15353 if (REG_P (x))
15354 print_reg (x, code, file);
15356 else if (MEM_P (x))
15358 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15359 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15360 && GET_MODE (x) != BLKmode)
15362 const char * size;
15363 switch (GET_MODE_SIZE (GET_MODE (x)))
15365 case 1: size = "BYTE"; break;
15366 case 2: size = "WORD"; break;
15367 case 4: size = "DWORD"; break;
15368 case 8: size = "QWORD"; break;
15369 case 12: size = "TBYTE"; break;
15370 case 16:
15371 if (GET_MODE (x) == XFmode)
15372 size = "TBYTE";
15373 else
15374 size = "XMMWORD";
15375 break;
15376 case 32: size = "YMMWORD"; break;
15377 case 64: size = "ZMMWORD"; break;
15378 default:
15379 gcc_unreachable ();
15382 /* Check for explicit size override (codes 'b', 'w', 'k',
15383 'q' and 'x') */
15384 if (code == 'b')
15385 size = "BYTE";
15386 else if (code == 'w')
15387 size = "WORD";
15388 else if (code == 'k')
15389 size = "DWORD";
15390 else if (code == 'q')
15391 size = "QWORD";
15392 else if (code == 'x')
15393 size = "XMMWORD";
15395 fputs (size, file);
15396 fputs (" PTR ", file);
15399 x = XEXP (x, 0);
15400 /* Avoid (%rip) for call operands. */
15401 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15402 && !CONST_INT_P (x))
15403 output_addr_const (file, x);
15404 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15405 output_operand_lossage ("invalid constraints for operand");
15406 else
15407 output_address (x);
15410 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15412 REAL_VALUE_TYPE r;
15413 long l;
15415 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15416 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15418 if (ASSEMBLER_DIALECT == ASM_ATT)
15419 putc ('$', file);
15420 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15421 if (code == 'q')
15422 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15423 (unsigned long long) (int) l);
15424 else
15425 fprintf (file, "0x%08x", (unsigned int) l);
15428 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15430 REAL_VALUE_TYPE r;
15431 long l[2];
15433 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15434 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15436 if (ASSEMBLER_DIALECT == ASM_ATT)
15437 putc ('$', file);
15438 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15441 /* These float cases don't actually occur as immediate operands. */
15442 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15444 char dstr[30];
15446 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15447 fputs (dstr, file);
15450 else
15452 /* We have patterns that allow zero sets of memory, for instance.
15453 In 64-bit mode, we should probably support all 8-byte vectors,
15454 since we can in fact encode that into an immediate. */
15455 if (GET_CODE (x) == CONST_VECTOR)
15457 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15458 x = const0_rtx;
15461 if (code != 'P' && code != 'p')
15463 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15465 if (ASSEMBLER_DIALECT == ASM_ATT)
15466 putc ('$', file);
15468 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15469 || GET_CODE (x) == LABEL_REF)
15471 if (ASSEMBLER_DIALECT == ASM_ATT)
15472 putc ('$', file);
15473 else
15474 fputs ("OFFSET FLAT:", file);
15477 if (CONST_INT_P (x))
15478 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15479 else if (flag_pic || MACHOPIC_INDIRECT)
15480 output_pic_addr_const (file, x, code);
15481 else
15482 output_addr_const (file, x);
15486 static bool
15487 ix86_print_operand_punct_valid_p (unsigned char code)
15489 return (code == '@' || code == '*' || code == '+' || code == '&'
15490 || code == ';' || code == '~' || code == '^');
15493 /* Print a memory operand whose address is ADDR. */
15495 static void
15496 ix86_print_operand_address (FILE *file, rtx addr)
15498 struct ix86_address parts;
15499 rtx base, index, disp;
15500 int scale;
15501 int ok;
15502 bool vsib = false;
15503 int code = 0;
15505 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15507 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15508 gcc_assert (parts.index == NULL_RTX);
15509 parts.index = XVECEXP (addr, 0, 1);
15510 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15511 addr = XVECEXP (addr, 0, 0);
15512 vsib = true;
15514 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15516 gcc_assert (TARGET_64BIT);
15517 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15518 code = 'q';
15520 else
15521 ok = ix86_decompose_address (addr, &parts);
15523 gcc_assert (ok);
15525 base = parts.base;
15526 index = parts.index;
15527 disp = parts.disp;
15528 scale = parts.scale;
15530 switch (parts.seg)
15532 case SEG_DEFAULT:
15533 break;
15534 case SEG_FS:
15535 case SEG_GS:
15536 if (ASSEMBLER_DIALECT == ASM_ATT)
15537 putc ('%', file);
15538 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15539 break;
15540 default:
15541 gcc_unreachable ();
15544 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15545 if (TARGET_64BIT && !base && !index)
15547 rtx symbol = disp;
15549 if (GET_CODE (disp) == CONST
15550 && GET_CODE (XEXP (disp, 0)) == PLUS
15551 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15552 symbol = XEXP (XEXP (disp, 0), 0);
15554 if (GET_CODE (symbol) == LABEL_REF
15555 || (GET_CODE (symbol) == SYMBOL_REF
15556 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15557 base = pc_rtx;
15559 if (!base && !index)
15561 /* Displacement only requires special attention. */
15563 if (CONST_INT_P (disp))
15565 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15566 fputs ("ds:", file);
15567 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15569 else if (flag_pic)
15570 output_pic_addr_const (file, disp, 0);
15571 else
15572 output_addr_const (file, disp);
15574 else
15576 /* Print SImode register names to force addr32 prefix. */
15577 if (SImode_address_operand (addr, VOIDmode))
15579 #ifdef ENABLE_CHECKING
15580 gcc_assert (TARGET_64BIT);
15581 switch (GET_CODE (addr))
15583 case SUBREG:
15584 gcc_assert (GET_MODE (addr) == SImode);
15585 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15586 break;
15587 case ZERO_EXTEND:
15588 case AND:
15589 gcc_assert (GET_MODE (addr) == DImode);
15590 break;
15591 default:
15592 gcc_unreachable ();
15594 #endif
15595 gcc_assert (!code);
15596 code = 'k';
15598 else if (code == 0
15599 && TARGET_X32
15600 && disp
15601 && CONST_INT_P (disp)
15602 && INTVAL (disp) < -16*1024*1024)
15604 /* X32 runs in 64-bit mode, where displacement, DISP, in
15605 address DISP(%r64), is encoded as 32-bit immediate sign-
15606 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15607 address is %r64 + 0xffffffffbffffd00. When %r64 <
15608 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15609 which is invalid for x32. The correct address is %r64
15610 - 0x40000300 == 0xf7ffdd64. To properly encode
15611 -0x40000300(%r64) for x32, we zero-extend negative
15612 displacement by forcing addr32 prefix which truncates
15613 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15614 zero-extend all negative displacements, including -1(%rsp).
15615 However, for small negative displacements, sign-extension
15616 won't cause overflow. We only zero-extend negative
15617 displacements if they < -16*1024*1024, which is also used
15618 to check legitimate address displacements for PIC. */
15619 code = 'k';
15622 if (ASSEMBLER_DIALECT == ASM_ATT)
15624 if (disp)
15626 if (flag_pic)
15627 output_pic_addr_const (file, disp, 0);
15628 else if (GET_CODE (disp) == LABEL_REF)
15629 output_asm_label (disp);
15630 else
15631 output_addr_const (file, disp);
15634 putc ('(', file);
15635 if (base)
15636 print_reg (base, code, file);
15637 if (index)
15639 putc (',', file);
15640 print_reg (index, vsib ? 0 : code, file);
15641 if (scale != 1 || vsib)
15642 fprintf (file, ",%d", scale);
15644 putc (')', file);
15646 else
15648 rtx offset = NULL_RTX;
15650 if (disp)
15652 /* Pull out the offset of a symbol; print any symbol itself. */
15653 if (GET_CODE (disp) == CONST
15654 && GET_CODE (XEXP (disp, 0)) == PLUS
15655 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15657 offset = XEXP (XEXP (disp, 0), 1);
15658 disp = gen_rtx_CONST (VOIDmode,
15659 XEXP (XEXP (disp, 0), 0));
15662 if (flag_pic)
15663 output_pic_addr_const (file, disp, 0);
15664 else if (GET_CODE (disp) == LABEL_REF)
15665 output_asm_label (disp);
15666 else if (CONST_INT_P (disp))
15667 offset = disp;
15668 else
15669 output_addr_const (file, disp);
15672 putc ('[', file);
15673 if (base)
15675 print_reg (base, code, file);
15676 if (offset)
15678 if (INTVAL (offset) >= 0)
15679 putc ('+', file);
15680 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15683 else if (offset)
15684 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15685 else
15686 putc ('0', file);
15688 if (index)
15690 putc ('+', file);
15691 print_reg (index, vsib ? 0 : code, file);
15692 if (scale != 1 || vsib)
15693 fprintf (file, "*%d", scale);
15695 putc (']', file);
15700 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15702 static bool
15703 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15705 rtx op;
15707 if (GET_CODE (x) != UNSPEC)
15708 return false;
15710 op = XVECEXP (x, 0, 0);
15711 switch (XINT (x, 1))
15713 case UNSPEC_GOTTPOFF:
15714 output_addr_const (file, op);
15715 /* FIXME: This might be @TPOFF in Sun ld. */
15716 fputs ("@gottpoff", file);
15717 break;
15718 case UNSPEC_TPOFF:
15719 output_addr_const (file, op);
15720 fputs ("@tpoff", file);
15721 break;
15722 case UNSPEC_NTPOFF:
15723 output_addr_const (file, op);
15724 if (TARGET_64BIT)
15725 fputs ("@tpoff", file);
15726 else
15727 fputs ("@ntpoff", file);
15728 break;
15729 case UNSPEC_DTPOFF:
15730 output_addr_const (file, op);
15731 fputs ("@dtpoff", file);
15732 break;
15733 case UNSPEC_GOTNTPOFF:
15734 output_addr_const (file, op);
15735 if (TARGET_64BIT)
15736 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15737 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15738 else
15739 fputs ("@gotntpoff", file);
15740 break;
15741 case UNSPEC_INDNTPOFF:
15742 output_addr_const (file, op);
15743 fputs ("@indntpoff", file);
15744 break;
15745 #if TARGET_MACHO
15746 case UNSPEC_MACHOPIC_OFFSET:
15747 output_addr_const (file, op);
15748 putc ('-', file);
15749 machopic_output_function_base_name (file);
15750 break;
15751 #endif
15753 case UNSPEC_STACK_CHECK:
15755 int offset;
15757 gcc_assert (flag_split_stack);
15759 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15760 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15761 #else
15762 gcc_unreachable ();
15763 #endif
15765 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15767 break;
15769 default:
15770 return false;
15773 return true;
15776 /* Split one or more double-mode RTL references into pairs of half-mode
15777 references. The RTL can be REG, offsettable MEM, integer constant, or
15778 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15779 split and "num" is its length. lo_half and hi_half are output arrays
15780 that parallel "operands". */
15782 void
15783 split_double_mode (enum machine_mode mode, rtx operands[],
15784 int num, rtx lo_half[], rtx hi_half[])
15786 enum machine_mode half_mode;
15787 unsigned int byte;
15789 switch (mode)
15791 case TImode:
15792 half_mode = DImode;
15793 break;
15794 case DImode:
15795 half_mode = SImode;
15796 break;
15797 default:
15798 gcc_unreachable ();
15801 byte = GET_MODE_SIZE (half_mode);
15803 while (num--)
15805 rtx op = operands[num];
15807 /* simplify_subreg refuse to split volatile memory addresses,
15808 but we still have to handle it. */
15809 if (MEM_P (op))
15811 lo_half[num] = adjust_address (op, half_mode, 0);
15812 hi_half[num] = adjust_address (op, half_mode, byte);
15814 else
15816 lo_half[num] = simplify_gen_subreg (half_mode, op,
15817 GET_MODE (op) == VOIDmode
15818 ? mode : GET_MODE (op), 0);
15819 hi_half[num] = simplify_gen_subreg (half_mode, op,
15820 GET_MODE (op) == VOIDmode
15821 ? mode : GET_MODE (op), byte);
15826 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15827 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15828 is the expression of the binary operation. The output may either be
15829 emitted here, or returned to the caller, like all output_* functions.
15831 There is no guarantee that the operands are the same mode, as they
15832 might be within FLOAT or FLOAT_EXTEND expressions. */
15834 #ifndef SYSV386_COMPAT
15835 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15836 wants to fix the assemblers because that causes incompatibility
15837 with gcc. No-one wants to fix gcc because that causes
15838 incompatibility with assemblers... You can use the option of
15839 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15840 #define SYSV386_COMPAT 1
15841 #endif
15843 const char *
15844 output_387_binary_op (rtx insn, rtx *operands)
15846 static char buf[40];
15847 const char *p;
15848 const char *ssep;
15849 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15851 #ifdef ENABLE_CHECKING
15852 /* Even if we do not want to check the inputs, this documents input
15853 constraints. Which helps in understanding the following code. */
15854 if (STACK_REG_P (operands[0])
15855 && ((REG_P (operands[1])
15856 && REGNO (operands[0]) == REGNO (operands[1])
15857 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15858 || (REG_P (operands[2])
15859 && REGNO (operands[0]) == REGNO (operands[2])
15860 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15861 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15862 ; /* ok */
15863 else
15864 gcc_assert (is_sse);
15865 #endif
15867 switch (GET_CODE (operands[3]))
15869 case PLUS:
15870 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15871 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15872 p = "fiadd";
15873 else
15874 p = "fadd";
15875 ssep = "vadd";
15876 break;
15878 case MINUS:
15879 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15880 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15881 p = "fisub";
15882 else
15883 p = "fsub";
15884 ssep = "vsub";
15885 break;
15887 case MULT:
15888 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15889 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15890 p = "fimul";
15891 else
15892 p = "fmul";
15893 ssep = "vmul";
15894 break;
15896 case DIV:
15897 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15898 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15899 p = "fidiv";
15900 else
15901 p = "fdiv";
15902 ssep = "vdiv";
15903 break;
15905 default:
15906 gcc_unreachable ();
15909 if (is_sse)
15911 if (TARGET_AVX)
15913 strcpy (buf, ssep);
15914 if (GET_MODE (operands[0]) == SFmode)
15915 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15916 else
15917 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15919 else
15921 strcpy (buf, ssep + 1);
15922 if (GET_MODE (operands[0]) == SFmode)
15923 strcat (buf, "ss\t{%2, %0|%0, %2}");
15924 else
15925 strcat (buf, "sd\t{%2, %0|%0, %2}");
15927 return buf;
15929 strcpy (buf, p);
15931 switch (GET_CODE (operands[3]))
15933 case MULT:
15934 case PLUS:
15935 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15937 rtx temp = operands[2];
15938 operands[2] = operands[1];
15939 operands[1] = temp;
15942 /* know operands[0] == operands[1]. */
15944 if (MEM_P (operands[2]))
15946 p = "%Z2\t%2";
15947 break;
15950 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15952 if (STACK_TOP_P (operands[0]))
15953 /* How is it that we are storing to a dead operand[2]?
15954 Well, presumably operands[1] is dead too. We can't
15955 store the result to st(0) as st(0) gets popped on this
15956 instruction. Instead store to operands[2] (which I
15957 think has to be st(1)). st(1) will be popped later.
15958 gcc <= 2.8.1 didn't have this check and generated
15959 assembly code that the Unixware assembler rejected. */
15960 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15961 else
15962 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15963 break;
15966 if (STACK_TOP_P (operands[0]))
15967 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15968 else
15969 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15970 break;
15972 case MINUS:
15973 case DIV:
15974 if (MEM_P (operands[1]))
15976 p = "r%Z1\t%1";
15977 break;
15980 if (MEM_P (operands[2]))
15982 p = "%Z2\t%2";
15983 break;
15986 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15988 #if SYSV386_COMPAT
15989 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15990 derived assemblers, confusingly reverse the direction of
15991 the operation for fsub{r} and fdiv{r} when the
15992 destination register is not st(0). The Intel assembler
15993 doesn't have this brain damage. Read !SYSV386_COMPAT to
15994 figure out what the hardware really does. */
15995 if (STACK_TOP_P (operands[0]))
15996 p = "{p\t%0, %2|rp\t%2, %0}";
15997 else
15998 p = "{rp\t%2, %0|p\t%0, %2}";
15999 #else
16000 if (STACK_TOP_P (operands[0]))
16001 /* As above for fmul/fadd, we can't store to st(0). */
16002 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16003 else
16004 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16005 #endif
16006 break;
16009 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16011 #if SYSV386_COMPAT
16012 if (STACK_TOP_P (operands[0]))
16013 p = "{rp\t%0, %1|p\t%1, %0}";
16014 else
16015 p = "{p\t%1, %0|rp\t%0, %1}";
16016 #else
16017 if (STACK_TOP_P (operands[0]))
16018 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16019 else
16020 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16021 #endif
16022 break;
16025 if (STACK_TOP_P (operands[0]))
16027 if (STACK_TOP_P (operands[1]))
16028 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16029 else
16030 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16031 break;
16033 else if (STACK_TOP_P (operands[1]))
16035 #if SYSV386_COMPAT
16036 p = "{\t%1, %0|r\t%0, %1}";
16037 #else
16038 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16039 #endif
16041 else
16043 #if SYSV386_COMPAT
16044 p = "{r\t%2, %0|\t%0, %2}";
16045 #else
16046 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16047 #endif
16049 break;
16051 default:
16052 gcc_unreachable ();
16055 strcat (buf, p);
16056 return buf;
16059 /* Check if a 256bit AVX register is referenced inside of EXP. */
16061 static int
16062 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16064 rtx exp = *pexp;
16066 if (GET_CODE (exp) == SUBREG)
16067 exp = SUBREG_REG (exp);
16069 if (REG_P (exp)
16070 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16071 return 1;
16073 return 0;
16076 /* Return needed mode for entity in optimize_mode_switching pass. */
16078 static int
16079 ix86_avx_u128_mode_needed (rtx insn)
16081 if (CALL_P (insn))
16083 rtx link;
16085 /* Needed mode is set to AVX_U128_CLEAN if there are
16086 no 256bit modes used in function arguments. */
16087 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16088 link;
16089 link = XEXP (link, 1))
16091 if (GET_CODE (XEXP (link, 0)) == USE)
16093 rtx arg = XEXP (XEXP (link, 0), 0);
16095 if (ix86_check_avx256_register (&arg, NULL))
16096 return AVX_U128_DIRTY;
16100 return AVX_U128_CLEAN;
16103 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16104 changes state only when a 256bit register is written to, but we need
16105 to prevent the compiler from moving optimal insertion point above
16106 eventual read from 256bit register. */
16107 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16108 return AVX_U128_DIRTY;
16110 return AVX_U128_ANY;
16113 /* Return mode that i387 must be switched into
16114 prior to the execution of insn. */
16116 static int
16117 ix86_i387_mode_needed (int entity, rtx insn)
16119 enum attr_i387_cw mode;
16121 /* The mode UNINITIALIZED is used to store control word after a
16122 function call or ASM pattern. The mode ANY specify that function
16123 has no requirements on the control word and make no changes in the
16124 bits we are interested in. */
16126 if (CALL_P (insn)
16127 || (NONJUMP_INSN_P (insn)
16128 && (asm_noperands (PATTERN (insn)) >= 0
16129 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16130 return I387_CW_UNINITIALIZED;
16132 if (recog_memoized (insn) < 0)
16133 return I387_CW_ANY;
16135 mode = get_attr_i387_cw (insn);
16137 switch (entity)
16139 case I387_TRUNC:
16140 if (mode == I387_CW_TRUNC)
16141 return mode;
16142 break;
16144 case I387_FLOOR:
16145 if (mode == I387_CW_FLOOR)
16146 return mode;
16147 break;
16149 case I387_CEIL:
16150 if (mode == I387_CW_CEIL)
16151 return mode;
16152 break;
16154 case I387_MASK_PM:
16155 if (mode == I387_CW_MASK_PM)
16156 return mode;
16157 break;
16159 default:
16160 gcc_unreachable ();
16163 return I387_CW_ANY;
16166 /* Return mode that entity must be switched into
16167 prior to the execution of insn. */
16169 static int
16170 ix86_mode_needed (int entity, rtx insn)
16172 switch (entity)
16174 case AVX_U128:
16175 return ix86_avx_u128_mode_needed (insn);
16176 case I387_TRUNC:
16177 case I387_FLOOR:
16178 case I387_CEIL:
16179 case I387_MASK_PM:
16180 return ix86_i387_mode_needed (entity, insn);
16181 default:
16182 gcc_unreachable ();
16184 return 0;
16187 /* Check if a 256bit AVX register is referenced in stores. */
16189 static void
16190 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16192 if (ix86_check_avx256_register (&dest, NULL))
16194 bool *used = (bool *) data;
16195 *used = true;
16199 /* Calculate mode of upper 128bit AVX registers after the insn. */
16201 static int
16202 ix86_avx_u128_mode_after (int mode, rtx insn)
16204 rtx pat = PATTERN (insn);
16206 if (vzeroupper_operation (pat, VOIDmode)
16207 || vzeroall_operation (pat, VOIDmode))
16208 return AVX_U128_CLEAN;
16210 /* We know that state is clean after CALL insn if there are no
16211 256bit registers used in the function return register. */
16212 if (CALL_P (insn))
16214 bool avx_reg256_found = false;
16215 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16217 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16220 /* Otherwise, return current mode. Remember that if insn
16221 references AVX 256bit registers, the mode was already changed
16222 to DIRTY from MODE_NEEDED. */
16223 return mode;
16226 /* Return the mode that an insn results in. */
16229 ix86_mode_after (int entity, int mode, rtx insn)
16231 switch (entity)
16233 case AVX_U128:
16234 return ix86_avx_u128_mode_after (mode, insn);
16235 case I387_TRUNC:
16236 case I387_FLOOR:
16237 case I387_CEIL:
16238 case I387_MASK_PM:
16239 return mode;
16240 default:
16241 gcc_unreachable ();
16245 static int
16246 ix86_avx_u128_mode_entry (void)
16248 tree arg;
16250 /* Entry mode is set to AVX_U128_DIRTY if there are
16251 256bit modes used in function arguments. */
16252 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16253 arg = TREE_CHAIN (arg))
16255 rtx incoming = DECL_INCOMING_RTL (arg);
16257 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16258 return AVX_U128_DIRTY;
16261 return AVX_U128_CLEAN;
16264 /* Return a mode that ENTITY is assumed to be
16265 switched to at function entry. */
16267 static int
16268 ix86_mode_entry (int entity)
16270 switch (entity)
16272 case AVX_U128:
16273 return ix86_avx_u128_mode_entry ();
16274 case I387_TRUNC:
16275 case I387_FLOOR:
16276 case I387_CEIL:
16277 case I387_MASK_PM:
16278 return I387_CW_ANY;
16279 default:
16280 gcc_unreachable ();
16284 static int
16285 ix86_avx_u128_mode_exit (void)
16287 rtx reg = crtl->return_rtx;
16289 /* Exit mode is set to AVX_U128_DIRTY if there are
16290 256bit modes used in the function return register. */
16291 if (reg && ix86_check_avx256_register (&reg, NULL))
16292 return AVX_U128_DIRTY;
16294 return AVX_U128_CLEAN;
16297 /* Return a mode that ENTITY is assumed to be
16298 switched to at function exit. */
16300 static int
16301 ix86_mode_exit (int entity)
16303 switch (entity)
16305 case AVX_U128:
16306 return ix86_avx_u128_mode_exit ();
16307 case I387_TRUNC:
16308 case I387_FLOOR:
16309 case I387_CEIL:
16310 case I387_MASK_PM:
16311 return I387_CW_ANY;
16312 default:
16313 gcc_unreachable ();
16317 static int
16318 ix86_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
16320 return n;
16323 /* Output code to initialize control word copies used by trunc?f?i and
16324 rounding patterns. CURRENT_MODE is set to current control word,
16325 while NEW_MODE is set to new control word. */
16327 static void
16328 emit_i387_cw_initialization (int mode)
16330 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16331 rtx new_mode;
16333 enum ix86_stack_slot slot;
16335 rtx reg = gen_reg_rtx (HImode);
16337 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16338 emit_move_insn (reg, copy_rtx (stored_mode));
16340 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16341 || optimize_insn_for_size_p ())
16343 switch (mode)
16345 case I387_CW_TRUNC:
16346 /* round toward zero (truncate) */
16347 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16348 slot = SLOT_CW_TRUNC;
16349 break;
16351 case I387_CW_FLOOR:
16352 /* round down toward -oo */
16353 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16354 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16355 slot = SLOT_CW_FLOOR;
16356 break;
16358 case I387_CW_CEIL:
16359 /* round up toward +oo */
16360 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16361 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16362 slot = SLOT_CW_CEIL;
16363 break;
16365 case I387_CW_MASK_PM:
16366 /* mask precision exception for nearbyint() */
16367 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16368 slot = SLOT_CW_MASK_PM;
16369 break;
16371 default:
16372 gcc_unreachable ();
16375 else
16377 switch (mode)
16379 case I387_CW_TRUNC:
16380 /* round toward zero (truncate) */
16381 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16382 slot = SLOT_CW_TRUNC;
16383 break;
16385 case I387_CW_FLOOR:
16386 /* round down toward -oo */
16387 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16388 slot = SLOT_CW_FLOOR;
16389 break;
16391 case I387_CW_CEIL:
16392 /* round up toward +oo */
16393 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16394 slot = SLOT_CW_CEIL;
16395 break;
16397 case I387_CW_MASK_PM:
16398 /* mask precision exception for nearbyint() */
16399 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16400 slot = SLOT_CW_MASK_PM;
16401 break;
16403 default:
16404 gcc_unreachable ();
16408 gcc_assert (slot < MAX_386_STACK_LOCALS);
16410 new_mode = assign_386_stack_local (HImode, slot);
16411 emit_move_insn (new_mode, reg);
16414 /* Emit vzeroupper. */
16416 void
16417 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16419 int i;
16421 /* Cancel automatic vzeroupper insertion if there are
16422 live call-saved SSE registers at the insertion point. */
16424 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16425 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16426 return;
16428 if (TARGET_64BIT)
16429 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16430 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16431 return;
16433 emit_insn (gen_avx_vzeroupper ());
16436 /* Generate one or more insns to set ENTITY to MODE. */
16438 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16439 is the set of hard registers live at the point where the insn(s)
16440 are to be inserted. */
16442 static void
16443 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16445 switch (entity)
16447 case AVX_U128:
16448 if (mode == AVX_U128_CLEAN)
16449 ix86_avx_emit_vzeroupper (regs_live);
16450 break;
16451 case I387_TRUNC:
16452 case I387_FLOOR:
16453 case I387_CEIL:
16454 case I387_MASK_PM:
16455 if (mode != I387_CW_ANY
16456 && mode != I387_CW_UNINITIALIZED)
16457 emit_i387_cw_initialization (mode);
16458 break;
16459 default:
16460 gcc_unreachable ();
16464 /* Output code for INSN to convert a float to a signed int. OPERANDS
16465 are the insn operands. The output may be [HSD]Imode and the input
16466 operand may be [SDX]Fmode. */
16468 const char *
16469 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16471 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16472 int dimode_p = GET_MODE (operands[0]) == DImode;
16473 int round_mode = get_attr_i387_cw (insn);
16475 /* Jump through a hoop or two for DImode, since the hardware has no
16476 non-popping instruction. We used to do this a different way, but
16477 that was somewhat fragile and broke with post-reload splitters. */
16478 if ((dimode_p || fisttp) && !stack_top_dies)
16479 output_asm_insn ("fld\t%y1", operands);
16481 gcc_assert (STACK_TOP_P (operands[1]));
16482 gcc_assert (MEM_P (operands[0]));
16483 gcc_assert (GET_MODE (operands[1]) != TFmode);
16485 if (fisttp)
16486 output_asm_insn ("fisttp%Z0\t%0", operands);
16487 else
16489 if (round_mode != I387_CW_ANY)
16490 output_asm_insn ("fldcw\t%3", operands);
16491 if (stack_top_dies || dimode_p)
16492 output_asm_insn ("fistp%Z0\t%0", operands);
16493 else
16494 output_asm_insn ("fist%Z0\t%0", operands);
16495 if (round_mode != I387_CW_ANY)
16496 output_asm_insn ("fldcw\t%2", operands);
16499 return "";
16502 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16503 have the values zero or one, indicates the ffreep insn's operand
16504 from the OPERANDS array. */
16506 static const char *
16507 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16509 if (TARGET_USE_FFREEP)
16510 #ifdef HAVE_AS_IX86_FFREEP
16511 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16512 #else
16514 static char retval[32];
16515 int regno = REGNO (operands[opno]);
16517 gcc_assert (STACK_REGNO_P (regno));
16519 regno -= FIRST_STACK_REG;
16521 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16522 return retval;
16524 #endif
16526 return opno ? "fstp\t%y1" : "fstp\t%y0";
16530 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16531 should be used. UNORDERED_P is true when fucom should be used. */
16533 const char *
16534 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16536 int stack_top_dies;
16537 rtx cmp_op0, cmp_op1;
16538 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16540 if (eflags_p)
16542 cmp_op0 = operands[0];
16543 cmp_op1 = operands[1];
16545 else
16547 cmp_op0 = operands[1];
16548 cmp_op1 = operands[2];
16551 if (is_sse)
16553 if (GET_MODE (operands[0]) == SFmode)
16554 if (unordered_p)
16555 return "%vucomiss\t{%1, %0|%0, %1}";
16556 else
16557 return "%vcomiss\t{%1, %0|%0, %1}";
16558 else
16559 if (unordered_p)
16560 return "%vucomisd\t{%1, %0|%0, %1}";
16561 else
16562 return "%vcomisd\t{%1, %0|%0, %1}";
16565 gcc_assert (STACK_TOP_P (cmp_op0));
16567 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16569 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16571 if (stack_top_dies)
16573 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16574 return output_387_ffreep (operands, 1);
16576 else
16577 return "ftst\n\tfnstsw\t%0";
16580 if (STACK_REG_P (cmp_op1)
16581 && stack_top_dies
16582 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16583 && REGNO (cmp_op1) != FIRST_STACK_REG)
16585 /* If both the top of the 387 stack dies, and the other operand
16586 is also a stack register that dies, then this must be a
16587 `fcompp' float compare */
16589 if (eflags_p)
16591 /* There is no double popping fcomi variant. Fortunately,
16592 eflags is immune from the fstp's cc clobbering. */
16593 if (unordered_p)
16594 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16595 else
16596 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16597 return output_387_ffreep (operands, 0);
16599 else
16601 if (unordered_p)
16602 return "fucompp\n\tfnstsw\t%0";
16603 else
16604 return "fcompp\n\tfnstsw\t%0";
16607 else
16609 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16611 static const char * const alt[16] =
16613 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16614 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16615 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16616 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16618 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16619 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16620 NULL,
16621 NULL,
16623 "fcomi\t{%y1, %0|%0, %y1}",
16624 "fcomip\t{%y1, %0|%0, %y1}",
16625 "fucomi\t{%y1, %0|%0, %y1}",
16626 "fucomip\t{%y1, %0|%0, %y1}",
16628 NULL,
16629 NULL,
16630 NULL,
16631 NULL
16634 int mask;
16635 const char *ret;
16637 mask = eflags_p << 3;
16638 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16639 mask |= unordered_p << 1;
16640 mask |= stack_top_dies;
16642 gcc_assert (mask < 16);
16643 ret = alt[mask];
16644 gcc_assert (ret);
16646 return ret;
16650 void
16651 ix86_output_addr_vec_elt (FILE *file, int value)
16653 const char *directive = ASM_LONG;
16655 #ifdef ASM_QUAD
16656 if (TARGET_LP64)
16657 directive = ASM_QUAD;
16658 #else
16659 gcc_assert (!TARGET_64BIT);
16660 #endif
16662 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16665 void
16666 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16668 const char *directive = ASM_LONG;
16670 #ifdef ASM_QUAD
16671 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16672 directive = ASM_QUAD;
16673 #else
16674 gcc_assert (!TARGET_64BIT);
16675 #endif
16676 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16677 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16678 fprintf (file, "%s%s%d-%s%d\n",
16679 directive, LPREFIX, value, LPREFIX, rel);
16680 else if (HAVE_AS_GOTOFF_IN_DATA)
16681 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16682 #if TARGET_MACHO
16683 else if (TARGET_MACHO)
16685 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16686 machopic_output_function_base_name (file);
16687 putc ('\n', file);
16689 #endif
16690 else
16691 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16692 GOT_SYMBOL_NAME, LPREFIX, value);
16695 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16696 for the target. */
16698 void
16699 ix86_expand_clear (rtx dest)
16701 rtx tmp;
16703 /* We play register width games, which are only valid after reload. */
16704 gcc_assert (reload_completed);
16706 /* Avoid HImode and its attendant prefix byte. */
16707 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16708 dest = gen_rtx_REG (SImode, REGNO (dest));
16709 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16711 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16713 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16714 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16717 emit_insn (tmp);
16720 /* X is an unchanging MEM. If it is a constant pool reference, return
16721 the constant pool rtx, else NULL. */
16724 maybe_get_pool_constant (rtx x)
16726 x = ix86_delegitimize_address (XEXP (x, 0));
16728 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16729 return get_pool_constant (x);
16731 return NULL_RTX;
16734 void
16735 ix86_expand_move (enum machine_mode mode, rtx operands[])
16737 rtx op0, op1;
16738 enum tls_model model;
16740 op0 = operands[0];
16741 op1 = operands[1];
16743 if (GET_CODE (op1) == SYMBOL_REF)
16745 rtx tmp;
16747 model = SYMBOL_REF_TLS_MODEL (op1);
16748 if (model)
16750 op1 = legitimize_tls_address (op1, model, true);
16751 op1 = force_operand (op1, op0);
16752 if (op1 == op0)
16753 return;
16754 op1 = convert_to_mode (mode, op1, 1);
16756 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16757 op1 = tmp;
16759 else if (GET_CODE (op1) == CONST
16760 && GET_CODE (XEXP (op1, 0)) == PLUS
16761 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16763 rtx addend = XEXP (XEXP (op1, 0), 1);
16764 rtx symbol = XEXP (XEXP (op1, 0), 0);
16765 rtx tmp;
16767 model = SYMBOL_REF_TLS_MODEL (symbol);
16768 if (model)
16769 tmp = legitimize_tls_address (symbol, model, true);
16770 else
16771 tmp = legitimize_pe_coff_symbol (symbol, true);
16773 if (tmp)
16775 tmp = force_operand (tmp, NULL);
16776 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16777 op0, 1, OPTAB_DIRECT);
16778 if (tmp == op0)
16779 return;
16780 op1 = convert_to_mode (mode, tmp, 1);
16784 if ((flag_pic || MACHOPIC_INDIRECT)
16785 && symbolic_operand (op1, mode))
16787 if (TARGET_MACHO && !TARGET_64BIT)
16789 #if TARGET_MACHO
16790 /* dynamic-no-pic */
16791 if (MACHOPIC_INDIRECT)
16793 rtx temp = ((reload_in_progress
16794 || ((op0 && REG_P (op0))
16795 && mode == Pmode))
16796 ? op0 : gen_reg_rtx (Pmode));
16797 op1 = machopic_indirect_data_reference (op1, temp);
16798 if (MACHOPIC_PURE)
16799 op1 = machopic_legitimize_pic_address (op1, mode,
16800 temp == op1 ? 0 : temp);
16802 if (op0 != op1 && GET_CODE (op0) != MEM)
16804 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16805 emit_insn (insn);
16806 return;
16808 if (GET_CODE (op0) == MEM)
16809 op1 = force_reg (Pmode, op1);
16810 else
16812 rtx temp = op0;
16813 if (GET_CODE (temp) != REG)
16814 temp = gen_reg_rtx (Pmode);
16815 temp = legitimize_pic_address (op1, temp);
16816 if (temp == op0)
16817 return;
16818 op1 = temp;
16820 /* dynamic-no-pic */
16821 #endif
16823 else
16825 if (MEM_P (op0))
16826 op1 = force_reg (mode, op1);
16827 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16829 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16830 op1 = legitimize_pic_address (op1, reg);
16831 if (op0 == op1)
16832 return;
16833 op1 = convert_to_mode (mode, op1, 1);
16837 else
16839 if (MEM_P (op0)
16840 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16841 || !push_operand (op0, mode))
16842 && MEM_P (op1))
16843 op1 = force_reg (mode, op1);
16845 if (push_operand (op0, mode)
16846 && ! general_no_elim_operand (op1, mode))
16847 op1 = copy_to_mode_reg (mode, op1);
16849 /* Force large constants in 64bit compilation into register
16850 to get them CSEed. */
16851 if (can_create_pseudo_p ()
16852 && (mode == DImode) && TARGET_64BIT
16853 && immediate_operand (op1, mode)
16854 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16855 && !register_operand (op0, mode)
16856 && optimize)
16857 op1 = copy_to_mode_reg (mode, op1);
16859 if (can_create_pseudo_p ()
16860 && FLOAT_MODE_P (mode)
16861 && GET_CODE (op1) == CONST_DOUBLE)
16863 /* If we are loading a floating point constant to a register,
16864 force the value to memory now, since we'll get better code
16865 out the back end. */
16867 op1 = validize_mem (force_const_mem (mode, op1));
16868 if (!register_operand (op0, mode))
16870 rtx temp = gen_reg_rtx (mode);
16871 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16872 emit_move_insn (op0, temp);
16873 return;
16878 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16881 void
16882 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16884 rtx op0 = operands[0], op1 = operands[1];
16885 unsigned int align = GET_MODE_ALIGNMENT (mode);
16887 if (push_operand (op0, VOIDmode))
16888 op0 = emit_move_resolve_push (mode, op0);
16890 /* Force constants other than zero into memory. We do not know how
16891 the instructions used to build constants modify the upper 64 bits
16892 of the register, once we have that information we may be able
16893 to handle some of them more efficiently. */
16894 if (can_create_pseudo_p ()
16895 && register_operand (op0, mode)
16896 && (CONSTANT_P (op1)
16897 || (GET_CODE (op1) == SUBREG
16898 && CONSTANT_P (SUBREG_REG (op1))))
16899 && !standard_sse_constant_p (op1))
16900 op1 = validize_mem (force_const_mem (mode, op1));
16902 /* We need to check memory alignment for SSE mode since attribute
16903 can make operands unaligned. */
16904 if (can_create_pseudo_p ()
16905 && SSE_REG_MODE_P (mode)
16906 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16907 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16909 rtx tmp[2];
16911 /* ix86_expand_vector_move_misalign() does not like constants ... */
16912 if (CONSTANT_P (op1)
16913 || (GET_CODE (op1) == SUBREG
16914 && CONSTANT_P (SUBREG_REG (op1))))
16915 op1 = validize_mem (force_const_mem (mode, op1));
16917 /* ... nor both arguments in memory. */
16918 if (!register_operand (op0, mode)
16919 && !register_operand (op1, mode))
16920 op1 = force_reg (mode, op1);
16922 tmp[0] = op0; tmp[1] = op1;
16923 ix86_expand_vector_move_misalign (mode, tmp);
16924 return;
16927 /* Make operand1 a register if it isn't already. */
16928 if (can_create_pseudo_p ()
16929 && !register_operand (op0, mode)
16930 && !register_operand (op1, mode))
16932 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16933 return;
16936 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16939 /* Split 32-byte AVX unaligned load and store if needed. */
16941 static void
16942 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16944 rtx m;
16945 rtx (*extract) (rtx, rtx, rtx);
16946 rtx (*load_unaligned) (rtx, rtx);
16947 rtx (*store_unaligned) (rtx, rtx);
16948 enum machine_mode mode;
16950 switch (GET_MODE (op0))
16952 default:
16953 gcc_unreachable ();
16954 case V32QImode:
16955 extract = gen_avx_vextractf128v32qi;
16956 load_unaligned = gen_avx_loaddquv32qi;
16957 store_unaligned = gen_avx_storedquv32qi;
16958 mode = V16QImode;
16959 break;
16960 case V8SFmode:
16961 extract = gen_avx_vextractf128v8sf;
16962 load_unaligned = gen_avx_loadups256;
16963 store_unaligned = gen_avx_storeups256;
16964 mode = V4SFmode;
16965 break;
16966 case V4DFmode:
16967 extract = gen_avx_vextractf128v4df;
16968 load_unaligned = gen_avx_loadupd256;
16969 store_unaligned = gen_avx_storeupd256;
16970 mode = V2DFmode;
16971 break;
16974 if (MEM_P (op1))
16976 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16978 rtx r = gen_reg_rtx (mode);
16979 m = adjust_address (op1, mode, 0);
16980 emit_move_insn (r, m);
16981 m = adjust_address (op1, mode, 16);
16982 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16983 emit_move_insn (op0, r);
16985 /* Normal *mov<mode>_internal pattern will handle
16986 unaligned loads just fine if misaligned_operand
16987 is true, and without the UNSPEC it can be combined
16988 with arithmetic instructions. */
16989 else if (misaligned_operand (op1, GET_MODE (op1)))
16990 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16991 else
16992 emit_insn (load_unaligned (op0, op1));
16994 else if (MEM_P (op0))
16996 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16998 m = adjust_address (op0, mode, 0);
16999 emit_insn (extract (m, op1, const0_rtx));
17000 m = adjust_address (op0, mode, 16);
17001 emit_insn (extract (m, op1, const1_rtx));
17003 else
17004 emit_insn (store_unaligned (op0, op1));
17006 else
17007 gcc_unreachable ();
17010 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17011 straight to ix86_expand_vector_move. */
17012 /* Code generation for scalar reg-reg moves of single and double precision data:
17013 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17014 movaps reg, reg
17015 else
17016 movss reg, reg
17017 if (x86_sse_partial_reg_dependency == true)
17018 movapd reg, reg
17019 else
17020 movsd reg, reg
17022 Code generation for scalar loads of double precision data:
17023 if (x86_sse_split_regs == true)
17024 movlpd mem, reg (gas syntax)
17025 else
17026 movsd mem, reg
17028 Code generation for unaligned packed loads of single precision data
17029 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17030 if (x86_sse_unaligned_move_optimal)
17031 movups mem, reg
17033 if (x86_sse_partial_reg_dependency == true)
17035 xorps reg, reg
17036 movlps mem, reg
17037 movhps mem+8, reg
17039 else
17041 movlps mem, reg
17042 movhps mem+8, reg
17045 Code generation for unaligned packed loads of double precision data
17046 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17047 if (x86_sse_unaligned_move_optimal)
17048 movupd mem, reg
17050 if (x86_sse_split_regs == true)
17052 movlpd mem, reg
17053 movhpd mem+8, reg
17055 else
17057 movsd mem, reg
17058 movhpd mem+8, reg
17062 void
17063 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17065 rtx op0, op1, orig_op0 = NULL_RTX, m;
17066 rtx (*load_unaligned) (rtx, rtx);
17067 rtx (*store_unaligned) (rtx, rtx);
17069 op0 = operands[0];
17070 op1 = operands[1];
17072 if (GET_MODE_SIZE (mode) == 64)
17074 switch (GET_MODE_CLASS (mode))
17076 case MODE_VECTOR_INT:
17077 case MODE_INT:
17078 if (GET_MODE (op0) != V16SImode)
17080 if (!MEM_P (op0))
17082 orig_op0 = op0;
17083 op0 = gen_reg_rtx (V16SImode);
17085 else
17086 op0 = gen_lowpart (V16SImode, op0);
17088 op1 = gen_lowpart (V16SImode, op1);
17089 /* FALLTHRU */
17091 case MODE_VECTOR_FLOAT:
17092 switch (GET_MODE (op0))
17094 default:
17095 gcc_unreachable ();
17096 case V16SImode:
17097 load_unaligned = gen_avx512f_loaddquv16si;
17098 store_unaligned = gen_avx512f_storedquv16si;
17099 break;
17100 case V16SFmode:
17101 load_unaligned = gen_avx512f_loadups512;
17102 store_unaligned = gen_avx512f_storeups512;
17103 break;
17104 case V8DFmode:
17105 load_unaligned = gen_avx512f_loadupd512;
17106 store_unaligned = gen_avx512f_storeupd512;
17107 break;
17110 if (MEM_P (op1))
17111 emit_insn (load_unaligned (op0, op1));
17112 else if (MEM_P (op0))
17113 emit_insn (store_unaligned (op0, op1));
17114 else
17115 gcc_unreachable ();
17116 if (orig_op0)
17117 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17118 break;
17120 default:
17121 gcc_unreachable ();
17124 return;
17127 if (TARGET_AVX
17128 && GET_MODE_SIZE (mode) == 32)
17130 switch (GET_MODE_CLASS (mode))
17132 case MODE_VECTOR_INT:
17133 case MODE_INT:
17134 if (GET_MODE (op0) != V32QImode)
17136 if (!MEM_P (op0))
17138 orig_op0 = op0;
17139 op0 = gen_reg_rtx (V32QImode);
17141 else
17142 op0 = gen_lowpart (V32QImode, op0);
17144 op1 = gen_lowpart (V32QImode, op1);
17145 /* FALLTHRU */
17147 case MODE_VECTOR_FLOAT:
17148 ix86_avx256_split_vector_move_misalign (op0, op1);
17149 if (orig_op0)
17150 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17151 break;
17153 default:
17154 gcc_unreachable ();
17157 return;
17160 if (MEM_P (op1))
17162 /* Normal *mov<mode>_internal pattern will handle
17163 unaligned loads just fine if misaligned_operand
17164 is true, and without the UNSPEC it can be combined
17165 with arithmetic instructions. */
17166 if (TARGET_AVX
17167 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17168 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17169 && misaligned_operand (op1, GET_MODE (op1)))
17170 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17171 /* ??? If we have typed data, then it would appear that using
17172 movdqu is the only way to get unaligned data loaded with
17173 integer type. */
17174 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17176 if (GET_MODE (op0) != V16QImode)
17178 orig_op0 = op0;
17179 op0 = gen_reg_rtx (V16QImode);
17181 op1 = gen_lowpart (V16QImode, op1);
17182 /* We will eventually emit movups based on insn attributes. */
17183 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17184 if (orig_op0)
17185 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17187 else if (TARGET_SSE2 && mode == V2DFmode)
17189 rtx zero;
17191 if (TARGET_AVX
17192 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17193 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17194 || optimize_insn_for_size_p ())
17196 /* We will eventually emit movups based on insn attributes. */
17197 emit_insn (gen_sse2_loadupd (op0, op1));
17198 return;
17201 /* When SSE registers are split into halves, we can avoid
17202 writing to the top half twice. */
17203 if (TARGET_SSE_SPLIT_REGS)
17205 emit_clobber (op0);
17206 zero = op0;
17208 else
17210 /* ??? Not sure about the best option for the Intel chips.
17211 The following would seem to satisfy; the register is
17212 entirely cleared, breaking the dependency chain. We
17213 then store to the upper half, with a dependency depth
17214 of one. A rumor has it that Intel recommends two movsd
17215 followed by an unpacklpd, but this is unconfirmed. And
17216 given that the dependency depth of the unpacklpd would
17217 still be one, I'm not sure why this would be better. */
17218 zero = CONST0_RTX (V2DFmode);
17221 m = adjust_address (op1, DFmode, 0);
17222 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17223 m = adjust_address (op1, DFmode, 8);
17224 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17226 else
17228 rtx t;
17230 if (TARGET_AVX
17231 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17232 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17233 || optimize_insn_for_size_p ())
17235 if (GET_MODE (op0) != V4SFmode)
17237 orig_op0 = op0;
17238 op0 = gen_reg_rtx (V4SFmode);
17240 op1 = gen_lowpart (V4SFmode, op1);
17241 emit_insn (gen_sse_loadups (op0, op1));
17242 if (orig_op0)
17243 emit_move_insn (orig_op0,
17244 gen_lowpart (GET_MODE (orig_op0), op0));
17245 return;
17248 if (mode != V4SFmode)
17249 t = gen_reg_rtx (V4SFmode);
17250 else
17251 t = op0;
17253 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17254 emit_move_insn (t, CONST0_RTX (V4SFmode));
17255 else
17256 emit_clobber (t);
17258 m = adjust_address (op1, V2SFmode, 0);
17259 emit_insn (gen_sse_loadlps (t, t, m));
17260 m = adjust_address (op1, V2SFmode, 8);
17261 emit_insn (gen_sse_loadhps (t, t, m));
17262 if (mode != V4SFmode)
17263 emit_move_insn (op0, gen_lowpart (mode, t));
17266 else if (MEM_P (op0))
17268 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17270 op0 = gen_lowpart (V16QImode, op0);
17271 op1 = gen_lowpart (V16QImode, op1);
17272 /* We will eventually emit movups based on insn attributes. */
17273 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17275 else if (TARGET_SSE2 && mode == V2DFmode)
17277 if (TARGET_AVX
17278 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17279 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17280 || optimize_insn_for_size_p ())
17281 /* We will eventually emit movups based on insn attributes. */
17282 emit_insn (gen_sse2_storeupd (op0, op1));
17283 else
17285 m = adjust_address (op0, DFmode, 0);
17286 emit_insn (gen_sse2_storelpd (m, op1));
17287 m = adjust_address (op0, DFmode, 8);
17288 emit_insn (gen_sse2_storehpd (m, op1));
17291 else
17293 if (mode != V4SFmode)
17294 op1 = gen_lowpart (V4SFmode, op1);
17296 if (TARGET_AVX
17297 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17298 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17299 || optimize_insn_for_size_p ())
17301 op0 = gen_lowpart (V4SFmode, op0);
17302 emit_insn (gen_sse_storeups (op0, op1));
17304 else
17306 m = adjust_address (op0, V2SFmode, 0);
17307 emit_insn (gen_sse_storelps (m, op1));
17308 m = adjust_address (op0, V2SFmode, 8);
17309 emit_insn (gen_sse_storehps (m, op1));
17313 else
17314 gcc_unreachable ();
17317 /* Helper function of ix86_fixup_binary_operands to canonicalize
17318 operand order. Returns true if the operands should be swapped. */
17320 static bool
17321 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17322 rtx operands[])
17324 rtx dst = operands[0];
17325 rtx src1 = operands[1];
17326 rtx src2 = operands[2];
17328 /* If the operation is not commutative, we can't do anything. */
17329 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17330 return false;
17332 /* Highest priority is that src1 should match dst. */
17333 if (rtx_equal_p (dst, src1))
17334 return false;
17335 if (rtx_equal_p (dst, src2))
17336 return true;
17338 /* Next highest priority is that immediate constants come second. */
17339 if (immediate_operand (src2, mode))
17340 return false;
17341 if (immediate_operand (src1, mode))
17342 return true;
17344 /* Lowest priority is that memory references should come second. */
17345 if (MEM_P (src2))
17346 return false;
17347 if (MEM_P (src1))
17348 return true;
17350 return false;
17354 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17355 destination to use for the operation. If different from the true
17356 destination in operands[0], a copy operation will be required. */
17359 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17360 rtx operands[])
17362 rtx dst = operands[0];
17363 rtx src1 = operands[1];
17364 rtx src2 = operands[2];
17366 /* Canonicalize operand order. */
17367 if (ix86_swap_binary_operands_p (code, mode, operands))
17369 rtx temp;
17371 /* It is invalid to swap operands of different modes. */
17372 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17374 temp = src1;
17375 src1 = src2;
17376 src2 = temp;
17379 /* Both source operands cannot be in memory. */
17380 if (MEM_P (src1) && MEM_P (src2))
17382 /* Optimization: Only read from memory once. */
17383 if (rtx_equal_p (src1, src2))
17385 src2 = force_reg (mode, src2);
17386 src1 = src2;
17388 else if (rtx_equal_p (dst, src1))
17389 src2 = force_reg (mode, src2);
17390 else
17391 src1 = force_reg (mode, src1);
17394 /* If the destination is memory, and we do not have matching source
17395 operands, do things in registers. */
17396 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17397 dst = gen_reg_rtx (mode);
17399 /* Source 1 cannot be a constant. */
17400 if (CONSTANT_P (src1))
17401 src1 = force_reg (mode, src1);
17403 /* Source 1 cannot be a non-matching memory. */
17404 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17405 src1 = force_reg (mode, src1);
17407 /* Improve address combine. */
17408 if (code == PLUS
17409 && GET_MODE_CLASS (mode) == MODE_INT
17410 && MEM_P (src2))
17411 src2 = force_reg (mode, src2);
17413 operands[1] = src1;
17414 operands[2] = src2;
17415 return dst;
17418 /* Similarly, but assume that the destination has already been
17419 set up properly. */
17421 void
17422 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17423 enum machine_mode mode, rtx operands[])
17425 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17426 gcc_assert (dst == operands[0]);
17429 /* Attempt to expand a binary operator. Make the expansion closer to the
17430 actual machine, then just general_operand, which will allow 3 separate
17431 memory references (one output, two input) in a single insn. */
17433 void
17434 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17435 rtx operands[])
17437 rtx src1, src2, dst, op, clob;
17439 dst = ix86_fixup_binary_operands (code, mode, operands);
17440 src1 = operands[1];
17441 src2 = operands[2];
17443 /* Emit the instruction. */
17445 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17446 if (reload_in_progress)
17448 /* Reload doesn't know about the flags register, and doesn't know that
17449 it doesn't want to clobber it. We can only do this with PLUS. */
17450 gcc_assert (code == PLUS);
17451 emit_insn (op);
17453 else if (reload_completed
17454 && code == PLUS
17455 && !rtx_equal_p (dst, src1))
17457 /* This is going to be an LEA; avoid splitting it later. */
17458 emit_insn (op);
17460 else
17462 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17463 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17466 /* Fix up the destination if needed. */
17467 if (dst != operands[0])
17468 emit_move_insn (operands[0], dst);
17471 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17472 the given OPERANDS. */
17474 void
17475 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17476 rtx operands[])
17478 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17479 if (GET_CODE (operands[1]) == SUBREG)
17481 op1 = operands[1];
17482 op2 = operands[2];
17484 else if (GET_CODE (operands[2]) == SUBREG)
17486 op1 = operands[2];
17487 op2 = operands[1];
17489 /* Optimize (__m128i) d | (__m128i) e and similar code
17490 when d and e are float vectors into float vector logical
17491 insn. In C/C++ without using intrinsics there is no other way
17492 to express vector logical operation on float vectors than
17493 to cast them temporarily to integer vectors. */
17494 if (op1
17495 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17496 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17497 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17498 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17499 && SUBREG_BYTE (op1) == 0
17500 && (GET_CODE (op2) == CONST_VECTOR
17501 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17502 && SUBREG_BYTE (op2) == 0))
17503 && can_create_pseudo_p ())
17505 rtx dst;
17506 switch (GET_MODE (SUBREG_REG (op1)))
17508 case V4SFmode:
17509 case V8SFmode:
17510 case V2DFmode:
17511 case V4DFmode:
17512 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17513 if (GET_CODE (op2) == CONST_VECTOR)
17515 op2 = gen_lowpart (GET_MODE (dst), op2);
17516 op2 = force_reg (GET_MODE (dst), op2);
17518 else
17520 op1 = operands[1];
17521 op2 = SUBREG_REG (operands[2]);
17522 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17523 op2 = force_reg (GET_MODE (dst), op2);
17525 op1 = SUBREG_REG (op1);
17526 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17527 op1 = force_reg (GET_MODE (dst), op1);
17528 emit_insn (gen_rtx_SET (VOIDmode, dst,
17529 gen_rtx_fmt_ee (code, GET_MODE (dst),
17530 op1, op2)));
17531 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17532 return;
17533 default:
17534 break;
17537 if (!nonimmediate_operand (operands[1], mode))
17538 operands[1] = force_reg (mode, operands[1]);
17539 if (!nonimmediate_operand (operands[2], mode))
17540 operands[2] = force_reg (mode, operands[2]);
17541 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17542 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17543 gen_rtx_fmt_ee (code, mode, operands[1],
17544 operands[2])));
17547 /* Return TRUE or FALSE depending on whether the binary operator meets the
17548 appropriate constraints. */
17550 bool
17551 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17552 rtx operands[3])
17554 rtx dst = operands[0];
17555 rtx src1 = operands[1];
17556 rtx src2 = operands[2];
17558 /* Both source operands cannot be in memory. */
17559 if (MEM_P (src1) && MEM_P (src2))
17560 return false;
17562 /* Canonicalize operand order for commutative operators. */
17563 if (ix86_swap_binary_operands_p (code, mode, operands))
17565 rtx temp = src1;
17566 src1 = src2;
17567 src2 = temp;
17570 /* If the destination is memory, we must have a matching source operand. */
17571 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17572 return false;
17574 /* Source 1 cannot be a constant. */
17575 if (CONSTANT_P (src1))
17576 return false;
17578 /* Source 1 cannot be a non-matching memory. */
17579 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17580 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17581 return (code == AND
17582 && (mode == HImode
17583 || mode == SImode
17584 || (TARGET_64BIT && mode == DImode))
17585 && satisfies_constraint_L (src2));
17587 return true;
17590 /* Attempt to expand a unary operator. Make the expansion closer to the
17591 actual machine, then just general_operand, which will allow 2 separate
17592 memory references (one output, one input) in a single insn. */
17594 void
17595 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17596 rtx operands[])
17598 int matching_memory;
17599 rtx src, dst, op, clob;
17601 dst = operands[0];
17602 src = operands[1];
17604 /* If the destination is memory, and we do not have matching source
17605 operands, do things in registers. */
17606 matching_memory = 0;
17607 if (MEM_P (dst))
17609 if (rtx_equal_p (dst, src))
17610 matching_memory = 1;
17611 else
17612 dst = gen_reg_rtx (mode);
17615 /* When source operand is memory, destination must match. */
17616 if (MEM_P (src) && !matching_memory)
17617 src = force_reg (mode, src);
17619 /* Emit the instruction. */
17621 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17622 if (reload_in_progress || code == NOT)
17624 /* Reload doesn't know about the flags register, and doesn't know that
17625 it doesn't want to clobber it. */
17626 gcc_assert (code == NOT);
17627 emit_insn (op);
17629 else
17631 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17635 /* Fix up the destination if needed. */
17636 if (dst != operands[0])
17637 emit_move_insn (operands[0], dst);
17640 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17641 divisor are within the range [0-255]. */
17643 void
17644 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17645 bool signed_p)
17647 rtx end_label, qimode_label;
17648 rtx insn, div, mod;
17649 rtx scratch, tmp0, tmp1, tmp2;
17650 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17651 rtx (*gen_zero_extend) (rtx, rtx);
17652 rtx (*gen_test_ccno_1) (rtx, rtx);
17654 switch (mode)
17656 case SImode:
17657 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17658 gen_test_ccno_1 = gen_testsi_ccno_1;
17659 gen_zero_extend = gen_zero_extendqisi2;
17660 break;
17661 case DImode:
17662 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17663 gen_test_ccno_1 = gen_testdi_ccno_1;
17664 gen_zero_extend = gen_zero_extendqidi2;
17665 break;
17666 default:
17667 gcc_unreachable ();
17670 end_label = gen_label_rtx ();
17671 qimode_label = gen_label_rtx ();
17673 scratch = gen_reg_rtx (mode);
17675 /* Use 8bit unsigned divimod if dividend and divisor are within
17676 the range [0-255]. */
17677 emit_move_insn (scratch, operands[2]);
17678 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17679 scratch, 1, OPTAB_DIRECT);
17680 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17681 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17682 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17683 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17684 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17685 pc_rtx);
17686 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17687 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17688 JUMP_LABEL (insn) = qimode_label;
17690 /* Generate original signed/unsigned divimod. */
17691 div = gen_divmod4_1 (operands[0], operands[1],
17692 operands[2], operands[3]);
17693 emit_insn (div);
17695 /* Branch to the end. */
17696 emit_jump_insn (gen_jump (end_label));
17697 emit_barrier ();
17699 /* Generate 8bit unsigned divide. */
17700 emit_label (qimode_label);
17701 /* Don't use operands[0] for result of 8bit divide since not all
17702 registers support QImode ZERO_EXTRACT. */
17703 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17704 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17705 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17706 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17708 if (signed_p)
17710 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17711 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17713 else
17715 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17716 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17719 /* Extract remainder from AH. */
17720 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17721 if (REG_P (operands[1]))
17722 insn = emit_move_insn (operands[1], tmp1);
17723 else
17725 /* Need a new scratch register since the old one has result
17726 of 8bit divide. */
17727 scratch = gen_reg_rtx (mode);
17728 emit_move_insn (scratch, tmp1);
17729 insn = emit_move_insn (operands[1], scratch);
17731 set_unique_reg_note (insn, REG_EQUAL, mod);
17733 /* Zero extend quotient from AL. */
17734 tmp1 = gen_lowpart (QImode, tmp0);
17735 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17736 set_unique_reg_note (insn, REG_EQUAL, div);
17738 emit_label (end_label);
17741 /* Whether it is OK to emit CFI directives when emitting asm code. */
17743 bool
17744 ix86_emit_cfi ()
17746 return dwarf2out_do_cfi_asm ();
17749 #define LEA_MAX_STALL (3)
17750 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17752 /* Increase given DISTANCE in half-cycles according to
17753 dependencies between PREV and NEXT instructions.
17754 Add 1 half-cycle if there is no dependency and
17755 go to next cycle if there is some dependecy. */
17757 static unsigned int
17758 increase_distance (rtx prev, rtx next, unsigned int distance)
17760 df_ref *use_rec;
17761 df_ref *def_rec;
17763 if (!prev || !next)
17764 return distance + (distance & 1) + 2;
17766 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17767 return distance + 1;
17769 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17770 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17771 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17772 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17773 return distance + (distance & 1) + 2;
17775 return distance + 1;
17778 /* Function checks if instruction INSN defines register number
17779 REGNO1 or REGNO2. */
17781 static bool
17782 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17783 rtx insn)
17785 df_ref *def_rec;
17787 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17788 if (DF_REF_REG_DEF_P (*def_rec)
17789 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17790 && (regno1 == DF_REF_REGNO (*def_rec)
17791 || regno2 == DF_REF_REGNO (*def_rec)))
17793 return true;
17796 return false;
17799 /* Function checks if instruction INSN uses register number
17800 REGNO as a part of address expression. */
17802 static bool
17803 insn_uses_reg_mem (unsigned int regno, rtx insn)
17805 df_ref *use_rec;
17807 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17808 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17809 return true;
17811 return false;
17814 /* Search backward for non-agu definition of register number REGNO1
17815 or register number REGNO2 in basic block starting from instruction
17816 START up to head of basic block or instruction INSN.
17818 Function puts true value into *FOUND var if definition was found
17819 and false otherwise.
17821 Distance in half-cycles between START and found instruction or head
17822 of BB is added to DISTANCE and returned. */
17824 static int
17825 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17826 rtx insn, int distance,
17827 rtx start, bool *found)
17829 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17830 rtx prev = start;
17831 rtx next = NULL;
17833 *found = false;
17835 while (prev
17836 && prev != insn
17837 && distance < LEA_SEARCH_THRESHOLD)
17839 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17841 distance = increase_distance (prev, next, distance);
17842 if (insn_defines_reg (regno1, regno2, prev))
17844 if (recog_memoized (prev) < 0
17845 || get_attr_type (prev) != TYPE_LEA)
17847 *found = true;
17848 return distance;
17852 next = prev;
17854 if (prev == BB_HEAD (bb))
17855 break;
17857 prev = PREV_INSN (prev);
17860 return distance;
17863 /* Search backward for non-agu definition of register number REGNO1
17864 or register number REGNO2 in INSN's basic block until
17865 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17866 2. Reach neighbour BBs boundary, or
17867 3. Reach agu definition.
17868 Returns the distance between the non-agu definition point and INSN.
17869 If no definition point, returns -1. */
17871 static int
17872 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17873 rtx insn)
17875 basic_block bb = BLOCK_FOR_INSN (insn);
17876 int distance = 0;
17877 bool found = false;
17879 if (insn != BB_HEAD (bb))
17880 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17881 distance, PREV_INSN (insn),
17882 &found);
17884 if (!found && distance < LEA_SEARCH_THRESHOLD)
17886 edge e;
17887 edge_iterator ei;
17888 bool simple_loop = false;
17890 FOR_EACH_EDGE (e, ei, bb->preds)
17891 if (e->src == bb)
17893 simple_loop = true;
17894 break;
17897 if (simple_loop)
17898 distance = distance_non_agu_define_in_bb (regno1, regno2,
17899 insn, distance,
17900 BB_END (bb), &found);
17901 else
17903 int shortest_dist = -1;
17904 bool found_in_bb = false;
17906 FOR_EACH_EDGE (e, ei, bb->preds)
17908 int bb_dist
17909 = distance_non_agu_define_in_bb (regno1, regno2,
17910 insn, distance,
17911 BB_END (e->src),
17912 &found_in_bb);
17913 if (found_in_bb)
17915 if (shortest_dist < 0)
17916 shortest_dist = bb_dist;
17917 else if (bb_dist > 0)
17918 shortest_dist = MIN (bb_dist, shortest_dist);
17920 found = true;
17924 distance = shortest_dist;
17928 /* get_attr_type may modify recog data. We want to make sure
17929 that recog data is valid for instruction INSN, on which
17930 distance_non_agu_define is called. INSN is unchanged here. */
17931 extract_insn_cached (insn);
17933 if (!found)
17934 return -1;
17936 return distance >> 1;
17939 /* Return the distance in half-cycles between INSN and the next
17940 insn that uses register number REGNO in memory address added
17941 to DISTANCE. Return -1 if REGNO0 is set.
17943 Put true value into *FOUND if register usage was found and
17944 false otherwise.
17945 Put true value into *REDEFINED if register redefinition was
17946 found and false otherwise. */
17948 static int
17949 distance_agu_use_in_bb (unsigned int regno,
17950 rtx insn, int distance, rtx start,
17951 bool *found, bool *redefined)
17953 basic_block bb = NULL;
17954 rtx next = start;
17955 rtx prev = NULL;
17957 *found = false;
17958 *redefined = false;
17960 if (start != NULL_RTX)
17962 bb = BLOCK_FOR_INSN (start);
17963 if (start != BB_HEAD (bb))
17964 /* If insn and start belong to the same bb, set prev to insn,
17965 so the call to increase_distance will increase the distance
17966 between insns by 1. */
17967 prev = insn;
17970 while (next
17971 && next != insn
17972 && distance < LEA_SEARCH_THRESHOLD)
17974 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17976 distance = increase_distance(prev, next, distance);
17977 if (insn_uses_reg_mem (regno, next))
17979 /* Return DISTANCE if OP0 is used in memory
17980 address in NEXT. */
17981 *found = true;
17982 return distance;
17985 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17987 /* Return -1 if OP0 is set in NEXT. */
17988 *redefined = true;
17989 return -1;
17992 prev = next;
17995 if (next == BB_END (bb))
17996 break;
17998 next = NEXT_INSN (next);
18001 return distance;
18004 /* Return the distance between INSN and the next insn that uses
18005 register number REGNO0 in memory address. Return -1 if no such
18006 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18008 static int
18009 distance_agu_use (unsigned int regno0, rtx insn)
18011 basic_block bb = BLOCK_FOR_INSN (insn);
18012 int distance = 0;
18013 bool found = false;
18014 bool redefined = false;
18016 if (insn != BB_END (bb))
18017 distance = distance_agu_use_in_bb (regno0, insn, distance,
18018 NEXT_INSN (insn),
18019 &found, &redefined);
18021 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18023 edge e;
18024 edge_iterator ei;
18025 bool simple_loop = false;
18027 FOR_EACH_EDGE (e, ei, bb->succs)
18028 if (e->dest == bb)
18030 simple_loop = true;
18031 break;
18034 if (simple_loop)
18035 distance = distance_agu_use_in_bb (regno0, insn,
18036 distance, BB_HEAD (bb),
18037 &found, &redefined);
18038 else
18040 int shortest_dist = -1;
18041 bool found_in_bb = false;
18042 bool redefined_in_bb = false;
18044 FOR_EACH_EDGE (e, ei, bb->succs)
18046 int bb_dist
18047 = distance_agu_use_in_bb (regno0, insn,
18048 distance, BB_HEAD (e->dest),
18049 &found_in_bb, &redefined_in_bb);
18050 if (found_in_bb)
18052 if (shortest_dist < 0)
18053 shortest_dist = bb_dist;
18054 else if (bb_dist > 0)
18055 shortest_dist = MIN (bb_dist, shortest_dist);
18057 found = true;
18061 distance = shortest_dist;
18065 if (!found || redefined)
18066 return -1;
18068 return distance >> 1;
18071 /* Define this macro to tune LEA priority vs ADD, it take effect when
18072 there is a dilemma of choicing LEA or ADD
18073 Negative value: ADD is more preferred than LEA
18074 Zero: Netrual
18075 Positive value: LEA is more preferred than ADD*/
18076 #define IX86_LEA_PRIORITY 0
18078 /* Return true if usage of lea INSN has performance advantage
18079 over a sequence of instructions. Instructions sequence has
18080 SPLIT_COST cycles higher latency than lea latency. */
18082 static bool
18083 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18084 unsigned int regno2, int split_cost, bool has_scale)
18086 int dist_define, dist_use;
18088 /* For Silvermont if using a 2-source or 3-source LEA for
18089 non-destructive destination purposes, or due to wanting
18090 ability to use SCALE, the use of LEA is justified. */
18091 if (TARGET_SILVERMONT || TARGET_INTEL)
18093 if (has_scale)
18094 return true;
18095 if (split_cost < 1)
18096 return false;
18097 if (regno0 == regno1 || regno0 == regno2)
18098 return false;
18099 return true;
18102 dist_define = distance_non_agu_define (regno1, regno2, insn);
18103 dist_use = distance_agu_use (regno0, insn);
18105 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18107 /* If there is no non AGU operand definition, no AGU
18108 operand usage and split cost is 0 then both lea
18109 and non lea variants have same priority. Currently
18110 we prefer lea for 64 bit code and non lea on 32 bit
18111 code. */
18112 if (dist_use < 0 && split_cost == 0)
18113 return TARGET_64BIT || IX86_LEA_PRIORITY;
18114 else
18115 return true;
18118 /* With longer definitions distance lea is more preferable.
18119 Here we change it to take into account splitting cost and
18120 lea priority. */
18121 dist_define += split_cost + IX86_LEA_PRIORITY;
18123 /* If there is no use in memory addess then we just check
18124 that split cost exceeds AGU stall. */
18125 if (dist_use < 0)
18126 return dist_define > LEA_MAX_STALL;
18128 /* If this insn has both backward non-agu dependence and forward
18129 agu dependence, the one with short distance takes effect. */
18130 return dist_define >= dist_use;
18133 /* Return true if it is legal to clobber flags by INSN and
18134 false otherwise. */
18136 static bool
18137 ix86_ok_to_clobber_flags (rtx insn)
18139 basic_block bb = BLOCK_FOR_INSN (insn);
18140 df_ref *use;
18141 bitmap live;
18143 while (insn)
18145 if (NONDEBUG_INSN_P (insn))
18147 for (use = DF_INSN_USES (insn); *use; use++)
18148 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18149 return false;
18151 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18152 return true;
18155 if (insn == BB_END (bb))
18156 break;
18158 insn = NEXT_INSN (insn);
18161 live = df_get_live_out(bb);
18162 return !REGNO_REG_SET_P (live, FLAGS_REG);
18165 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18166 move and add to avoid AGU stalls. */
18168 bool
18169 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18171 unsigned int regno0, regno1, regno2;
18173 /* Check if we need to optimize. */
18174 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18175 return false;
18177 /* Check it is correct to split here. */
18178 if (!ix86_ok_to_clobber_flags(insn))
18179 return false;
18181 regno0 = true_regnum (operands[0]);
18182 regno1 = true_regnum (operands[1]);
18183 regno2 = true_regnum (operands[2]);
18185 /* We need to split only adds with non destructive
18186 destination operand. */
18187 if (regno0 == regno1 || regno0 == regno2)
18188 return false;
18189 else
18190 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18193 /* Return true if we should emit lea instruction instead of mov
18194 instruction. */
18196 bool
18197 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18199 unsigned int regno0, regno1;
18201 /* Check if we need to optimize. */
18202 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18203 return false;
18205 /* Use lea for reg to reg moves only. */
18206 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18207 return false;
18209 regno0 = true_regnum (operands[0]);
18210 regno1 = true_regnum (operands[1]);
18212 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18215 /* Return true if we need to split lea into a sequence of
18216 instructions to avoid AGU stalls. */
18218 bool
18219 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18221 unsigned int regno0, regno1, regno2;
18222 int split_cost;
18223 struct ix86_address parts;
18224 int ok;
18226 /* Check we need to optimize. */
18227 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18228 return false;
18230 /* The "at least two components" test below might not catch simple
18231 move or zero extension insns if parts.base is non-NULL and parts.disp
18232 is const0_rtx as the only components in the address, e.g. if the
18233 register is %rbp or %r13. As this test is much cheaper and moves or
18234 zero extensions are the common case, do this check first. */
18235 if (REG_P (operands[1])
18236 || (SImode_address_operand (operands[1], VOIDmode)
18237 && REG_P (XEXP (operands[1], 0))))
18238 return false;
18240 /* Check if it is OK to split here. */
18241 if (!ix86_ok_to_clobber_flags (insn))
18242 return false;
18244 ok = ix86_decompose_address (operands[1], &parts);
18245 gcc_assert (ok);
18247 /* There should be at least two components in the address. */
18248 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18249 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18250 return false;
18252 /* We should not split into add if non legitimate pic
18253 operand is used as displacement. */
18254 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18255 return false;
18257 regno0 = true_regnum (operands[0]) ;
18258 regno1 = INVALID_REGNUM;
18259 regno2 = INVALID_REGNUM;
18261 if (parts.base)
18262 regno1 = true_regnum (parts.base);
18263 if (parts.index)
18264 regno2 = true_regnum (parts.index);
18266 split_cost = 0;
18268 /* Compute how many cycles we will add to execution time
18269 if split lea into a sequence of instructions. */
18270 if (parts.base || parts.index)
18272 /* Have to use mov instruction if non desctructive
18273 destination form is used. */
18274 if (regno1 != regno0 && regno2 != regno0)
18275 split_cost += 1;
18277 /* Have to add index to base if both exist. */
18278 if (parts.base && parts.index)
18279 split_cost += 1;
18281 /* Have to use shift and adds if scale is 2 or greater. */
18282 if (parts.scale > 1)
18284 if (regno0 != regno1)
18285 split_cost += 1;
18286 else if (regno2 == regno0)
18287 split_cost += 4;
18288 else
18289 split_cost += parts.scale;
18292 /* Have to use add instruction with immediate if
18293 disp is non zero. */
18294 if (parts.disp && parts.disp != const0_rtx)
18295 split_cost += 1;
18297 /* Subtract the price of lea. */
18298 split_cost -= 1;
18301 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18302 parts.scale > 1);
18305 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18306 matches destination. RTX includes clobber of FLAGS_REG. */
18308 static void
18309 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18310 rtx dst, rtx src)
18312 rtx op, clob;
18314 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18315 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18317 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18320 /* Return true if regno1 def is nearest to the insn. */
18322 static bool
18323 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18325 rtx prev = insn;
18326 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18328 if (insn == start)
18329 return false;
18330 while (prev && prev != start)
18332 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18334 prev = PREV_INSN (prev);
18335 continue;
18337 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18338 return true;
18339 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18340 return false;
18341 prev = PREV_INSN (prev);
18344 /* None of the regs is defined in the bb. */
18345 return false;
18348 /* Split lea instructions into a sequence of instructions
18349 which are executed on ALU to avoid AGU stalls.
18350 It is assumed that it is allowed to clobber flags register
18351 at lea position. */
18353 void
18354 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18356 unsigned int regno0, regno1, regno2;
18357 struct ix86_address parts;
18358 rtx target, tmp;
18359 int ok, adds;
18361 ok = ix86_decompose_address (operands[1], &parts);
18362 gcc_assert (ok);
18364 target = gen_lowpart (mode, operands[0]);
18366 regno0 = true_regnum (target);
18367 regno1 = INVALID_REGNUM;
18368 regno2 = INVALID_REGNUM;
18370 if (parts.base)
18372 parts.base = gen_lowpart (mode, parts.base);
18373 regno1 = true_regnum (parts.base);
18376 if (parts.index)
18378 parts.index = gen_lowpart (mode, parts.index);
18379 regno2 = true_regnum (parts.index);
18382 if (parts.disp)
18383 parts.disp = gen_lowpart (mode, parts.disp);
18385 if (parts.scale > 1)
18387 /* Case r1 = r1 + ... */
18388 if (regno1 == regno0)
18390 /* If we have a case r1 = r1 + C * r2 then we
18391 should use multiplication which is very
18392 expensive. Assume cost model is wrong if we
18393 have such case here. */
18394 gcc_assert (regno2 != regno0);
18396 for (adds = parts.scale; adds > 0; adds--)
18397 ix86_emit_binop (PLUS, mode, target, parts.index);
18399 else
18401 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18402 if (regno0 != regno2)
18403 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18405 /* Use shift for scaling. */
18406 ix86_emit_binop (ASHIFT, mode, target,
18407 GEN_INT (exact_log2 (parts.scale)));
18409 if (parts.base)
18410 ix86_emit_binop (PLUS, mode, target, parts.base);
18412 if (parts.disp && parts.disp != const0_rtx)
18413 ix86_emit_binop (PLUS, mode, target, parts.disp);
18416 else if (!parts.base && !parts.index)
18418 gcc_assert(parts.disp);
18419 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18421 else
18423 if (!parts.base)
18425 if (regno0 != regno2)
18426 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18428 else if (!parts.index)
18430 if (regno0 != regno1)
18431 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18433 else
18435 if (regno0 == regno1)
18436 tmp = parts.index;
18437 else if (regno0 == regno2)
18438 tmp = parts.base;
18439 else
18441 rtx tmp1;
18443 /* Find better operand for SET instruction, depending
18444 on which definition is farther from the insn. */
18445 if (find_nearest_reg_def (insn, regno1, regno2))
18446 tmp = parts.index, tmp1 = parts.base;
18447 else
18448 tmp = parts.base, tmp1 = parts.index;
18450 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18452 if (parts.disp && parts.disp != const0_rtx)
18453 ix86_emit_binop (PLUS, mode, target, parts.disp);
18455 ix86_emit_binop (PLUS, mode, target, tmp1);
18456 return;
18459 ix86_emit_binop (PLUS, mode, target, tmp);
18462 if (parts.disp && parts.disp != const0_rtx)
18463 ix86_emit_binop (PLUS, mode, target, parts.disp);
18467 /* Return true if it is ok to optimize an ADD operation to LEA
18468 operation to avoid flag register consumation. For most processors,
18469 ADD is faster than LEA. For the processors like BONNELL, if the
18470 destination register of LEA holds an actual address which will be
18471 used soon, LEA is better and otherwise ADD is better. */
18473 bool
18474 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18476 unsigned int regno0 = true_regnum (operands[0]);
18477 unsigned int regno1 = true_regnum (operands[1]);
18478 unsigned int regno2 = true_regnum (operands[2]);
18480 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18481 if (regno0 != regno1 && regno0 != regno2)
18482 return true;
18484 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18485 return false;
18487 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18490 /* Return true if destination reg of SET_BODY is shift count of
18491 USE_BODY. */
18493 static bool
18494 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18496 rtx set_dest;
18497 rtx shift_rtx;
18498 int i;
18500 /* Retrieve destination of SET_BODY. */
18501 switch (GET_CODE (set_body))
18503 case SET:
18504 set_dest = SET_DEST (set_body);
18505 if (!set_dest || !REG_P (set_dest))
18506 return false;
18507 break;
18508 case PARALLEL:
18509 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18510 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18511 use_body))
18512 return true;
18513 default:
18514 return false;
18515 break;
18518 /* Retrieve shift count of USE_BODY. */
18519 switch (GET_CODE (use_body))
18521 case SET:
18522 shift_rtx = XEXP (use_body, 1);
18523 break;
18524 case PARALLEL:
18525 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18526 if (ix86_dep_by_shift_count_body (set_body,
18527 XVECEXP (use_body, 0, i)))
18528 return true;
18529 default:
18530 return false;
18531 break;
18534 if (shift_rtx
18535 && (GET_CODE (shift_rtx) == ASHIFT
18536 || GET_CODE (shift_rtx) == LSHIFTRT
18537 || GET_CODE (shift_rtx) == ASHIFTRT
18538 || GET_CODE (shift_rtx) == ROTATE
18539 || GET_CODE (shift_rtx) == ROTATERT))
18541 rtx shift_count = XEXP (shift_rtx, 1);
18543 /* Return true if shift count is dest of SET_BODY. */
18544 if (REG_P (shift_count))
18546 /* Add check since it can be invoked before register
18547 allocation in pre-reload schedule. */
18548 if (reload_completed
18549 && true_regnum (set_dest) == true_regnum (shift_count))
18550 return true;
18551 else if (REGNO(set_dest) == REGNO(shift_count))
18552 return true;
18556 return false;
18559 /* Return true if destination reg of SET_INSN is shift count of
18560 USE_INSN. */
18562 bool
18563 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18565 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18566 PATTERN (use_insn));
18569 /* Return TRUE or FALSE depending on whether the unary operator meets the
18570 appropriate constraints. */
18572 bool
18573 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18574 enum machine_mode mode ATTRIBUTE_UNUSED,
18575 rtx operands[2])
18577 /* If one of operands is memory, source and destination must match. */
18578 if ((MEM_P (operands[0])
18579 || MEM_P (operands[1]))
18580 && ! rtx_equal_p (operands[0], operands[1]))
18581 return false;
18582 return true;
18585 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18586 are ok, keeping in mind the possible movddup alternative. */
18588 bool
18589 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18591 if (MEM_P (operands[0]))
18592 return rtx_equal_p (operands[0], operands[1 + high]);
18593 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18594 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18595 return true;
18598 /* Post-reload splitter for converting an SF or DFmode value in an
18599 SSE register into an unsigned SImode. */
18601 void
18602 ix86_split_convert_uns_si_sse (rtx operands[])
18604 enum machine_mode vecmode;
18605 rtx value, large, zero_or_two31, input, two31, x;
18607 large = operands[1];
18608 zero_or_two31 = operands[2];
18609 input = operands[3];
18610 two31 = operands[4];
18611 vecmode = GET_MODE (large);
18612 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18614 /* Load up the value into the low element. We must ensure that the other
18615 elements are valid floats -- zero is the easiest such value. */
18616 if (MEM_P (input))
18618 if (vecmode == V4SFmode)
18619 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18620 else
18621 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18623 else
18625 input = gen_rtx_REG (vecmode, REGNO (input));
18626 emit_move_insn (value, CONST0_RTX (vecmode));
18627 if (vecmode == V4SFmode)
18628 emit_insn (gen_sse_movss (value, value, input));
18629 else
18630 emit_insn (gen_sse2_movsd (value, value, input));
18633 emit_move_insn (large, two31);
18634 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18636 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18637 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18639 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18640 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18642 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18643 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18645 large = gen_rtx_REG (V4SImode, REGNO (large));
18646 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18648 x = gen_rtx_REG (V4SImode, REGNO (value));
18649 if (vecmode == V4SFmode)
18650 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18651 else
18652 emit_insn (gen_sse2_cvttpd2dq (x, value));
18653 value = x;
18655 emit_insn (gen_xorv4si3 (value, value, large));
18658 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18659 Expects the 64-bit DImode to be supplied in a pair of integral
18660 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18661 -mfpmath=sse, !optimize_size only. */
18663 void
18664 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18666 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18667 rtx int_xmm, fp_xmm;
18668 rtx biases, exponents;
18669 rtx x;
18671 int_xmm = gen_reg_rtx (V4SImode);
18672 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18673 emit_insn (gen_movdi_to_sse (int_xmm, input));
18674 else if (TARGET_SSE_SPLIT_REGS)
18676 emit_clobber (int_xmm);
18677 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18679 else
18681 x = gen_reg_rtx (V2DImode);
18682 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18683 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18686 x = gen_rtx_CONST_VECTOR (V4SImode,
18687 gen_rtvec (4, GEN_INT (0x43300000UL),
18688 GEN_INT (0x45300000UL),
18689 const0_rtx, const0_rtx));
18690 exponents = validize_mem (force_const_mem (V4SImode, x));
18692 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18693 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18695 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18696 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18697 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18698 (0x1.0p84 + double(fp_value_hi_xmm)).
18699 Note these exponents differ by 32. */
18701 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18703 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18704 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18705 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18706 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18707 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18708 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18709 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18710 biases = validize_mem (force_const_mem (V2DFmode, biases));
18711 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18713 /* Add the upper and lower DFmode values together. */
18714 if (TARGET_SSE3)
18715 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18716 else
18718 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18719 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18720 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18723 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18726 /* Not used, but eases macroization of patterns. */
18727 void
18728 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18729 rtx input ATTRIBUTE_UNUSED)
18731 gcc_unreachable ();
18734 /* Convert an unsigned SImode value into a DFmode. Only currently used
18735 for SSE, but applicable anywhere. */
18737 void
18738 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18740 REAL_VALUE_TYPE TWO31r;
18741 rtx x, fp;
18743 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18744 NULL, 1, OPTAB_DIRECT);
18746 fp = gen_reg_rtx (DFmode);
18747 emit_insn (gen_floatsidf2 (fp, x));
18749 real_ldexp (&TWO31r, &dconst1, 31);
18750 x = const_double_from_real_value (TWO31r, DFmode);
18752 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18753 if (x != target)
18754 emit_move_insn (target, x);
18757 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18758 32-bit mode; otherwise we have a direct convert instruction. */
18760 void
18761 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18763 REAL_VALUE_TYPE TWO32r;
18764 rtx fp_lo, fp_hi, x;
18766 fp_lo = gen_reg_rtx (DFmode);
18767 fp_hi = gen_reg_rtx (DFmode);
18769 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18771 real_ldexp (&TWO32r, &dconst1, 32);
18772 x = const_double_from_real_value (TWO32r, DFmode);
18773 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18775 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18777 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18778 0, OPTAB_DIRECT);
18779 if (x != target)
18780 emit_move_insn (target, x);
18783 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18784 For x86_32, -mfpmath=sse, !optimize_size only. */
18785 void
18786 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18788 REAL_VALUE_TYPE ONE16r;
18789 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18791 real_ldexp (&ONE16r, &dconst1, 16);
18792 x = const_double_from_real_value (ONE16r, SFmode);
18793 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18794 NULL, 0, OPTAB_DIRECT);
18795 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18796 NULL, 0, OPTAB_DIRECT);
18797 fp_hi = gen_reg_rtx (SFmode);
18798 fp_lo = gen_reg_rtx (SFmode);
18799 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18800 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18801 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18802 0, OPTAB_DIRECT);
18803 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18804 0, OPTAB_DIRECT);
18805 if (!rtx_equal_p (target, fp_hi))
18806 emit_move_insn (target, fp_hi);
18809 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18810 a vector of unsigned ints VAL to vector of floats TARGET. */
18812 void
18813 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18815 rtx tmp[8];
18816 REAL_VALUE_TYPE TWO16r;
18817 enum machine_mode intmode = GET_MODE (val);
18818 enum machine_mode fltmode = GET_MODE (target);
18819 rtx (*cvt) (rtx, rtx);
18821 if (intmode == V4SImode)
18822 cvt = gen_floatv4siv4sf2;
18823 else
18824 cvt = gen_floatv8siv8sf2;
18825 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18826 tmp[0] = force_reg (intmode, tmp[0]);
18827 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18828 OPTAB_DIRECT);
18829 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18830 NULL_RTX, 1, OPTAB_DIRECT);
18831 tmp[3] = gen_reg_rtx (fltmode);
18832 emit_insn (cvt (tmp[3], tmp[1]));
18833 tmp[4] = gen_reg_rtx (fltmode);
18834 emit_insn (cvt (tmp[4], tmp[2]));
18835 real_ldexp (&TWO16r, &dconst1, 16);
18836 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18837 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18838 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18839 OPTAB_DIRECT);
18840 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18841 OPTAB_DIRECT);
18842 if (tmp[7] != target)
18843 emit_move_insn (target, tmp[7]);
18846 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18847 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18848 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18849 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18852 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18854 REAL_VALUE_TYPE TWO31r;
18855 rtx two31r, tmp[4];
18856 enum machine_mode mode = GET_MODE (val);
18857 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18858 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18859 rtx (*cmp) (rtx, rtx, rtx, rtx);
18860 int i;
18862 for (i = 0; i < 3; i++)
18863 tmp[i] = gen_reg_rtx (mode);
18864 real_ldexp (&TWO31r, &dconst1, 31);
18865 two31r = const_double_from_real_value (TWO31r, scalarmode);
18866 two31r = ix86_build_const_vector (mode, 1, two31r);
18867 two31r = force_reg (mode, two31r);
18868 switch (mode)
18870 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18871 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18872 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18873 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18874 default: gcc_unreachable ();
18876 tmp[3] = gen_rtx_LE (mode, two31r, val);
18877 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18878 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18879 0, OPTAB_DIRECT);
18880 if (intmode == V4SImode || TARGET_AVX2)
18881 *xorp = expand_simple_binop (intmode, ASHIFT,
18882 gen_lowpart (intmode, tmp[0]),
18883 GEN_INT (31), NULL_RTX, 0,
18884 OPTAB_DIRECT);
18885 else
18887 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18888 two31 = ix86_build_const_vector (intmode, 1, two31);
18889 *xorp = expand_simple_binop (intmode, AND,
18890 gen_lowpart (intmode, tmp[0]),
18891 two31, NULL_RTX, 0,
18892 OPTAB_DIRECT);
18894 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18895 0, OPTAB_DIRECT);
18898 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18899 then replicate the value for all elements of the vector
18900 register. */
18903 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18905 int i, n_elt;
18906 rtvec v;
18907 enum machine_mode scalar_mode;
18909 switch (mode)
18911 case V64QImode:
18912 case V32QImode:
18913 case V16QImode:
18914 case V32HImode:
18915 case V16HImode:
18916 case V8HImode:
18917 case V16SImode:
18918 case V8SImode:
18919 case V4SImode:
18920 case V8DImode:
18921 case V4DImode:
18922 case V2DImode:
18923 gcc_assert (vect);
18924 case V16SFmode:
18925 case V8SFmode:
18926 case V4SFmode:
18927 case V8DFmode:
18928 case V4DFmode:
18929 case V2DFmode:
18930 n_elt = GET_MODE_NUNITS (mode);
18931 v = rtvec_alloc (n_elt);
18932 scalar_mode = GET_MODE_INNER (mode);
18934 RTVEC_ELT (v, 0) = value;
18936 for (i = 1; i < n_elt; ++i)
18937 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18939 return gen_rtx_CONST_VECTOR (mode, v);
18941 default:
18942 gcc_unreachable ();
18946 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18947 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18948 for an SSE register. If VECT is true, then replicate the mask for
18949 all elements of the vector register. If INVERT is true, then create
18950 a mask excluding the sign bit. */
18953 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18955 enum machine_mode vec_mode, imode;
18956 HOST_WIDE_INT hi, lo;
18957 int shift = 63;
18958 rtx v;
18959 rtx mask;
18961 /* Find the sign bit, sign extended to 2*HWI. */
18962 switch (mode)
18964 case V16SImode:
18965 case V16SFmode:
18966 case V8SImode:
18967 case V4SImode:
18968 case V8SFmode:
18969 case V4SFmode:
18970 vec_mode = mode;
18971 mode = GET_MODE_INNER (mode);
18972 imode = SImode;
18973 lo = 0x80000000, hi = lo < 0;
18974 break;
18976 case V8DImode:
18977 case V4DImode:
18978 case V2DImode:
18979 case V8DFmode:
18980 case V4DFmode:
18981 case V2DFmode:
18982 vec_mode = mode;
18983 mode = GET_MODE_INNER (mode);
18984 imode = DImode;
18985 if (HOST_BITS_PER_WIDE_INT >= 64)
18986 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18987 else
18988 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18989 break;
18991 case TImode:
18992 case TFmode:
18993 vec_mode = VOIDmode;
18994 if (HOST_BITS_PER_WIDE_INT >= 64)
18996 imode = TImode;
18997 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18999 else
19001 rtvec vec;
19003 imode = DImode;
19004 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19006 if (invert)
19008 lo = ~lo, hi = ~hi;
19009 v = constm1_rtx;
19011 else
19012 v = const0_rtx;
19014 mask = immed_double_const (lo, hi, imode);
19016 vec = gen_rtvec (2, v, mask);
19017 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19018 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19020 return v;
19022 break;
19024 default:
19025 gcc_unreachable ();
19028 if (invert)
19029 lo = ~lo, hi = ~hi;
19031 /* Force this value into the low part of a fp vector constant. */
19032 mask = immed_double_const (lo, hi, imode);
19033 mask = gen_lowpart (mode, mask);
19035 if (vec_mode == VOIDmode)
19036 return force_reg (mode, mask);
19038 v = ix86_build_const_vector (vec_mode, vect, mask);
19039 return force_reg (vec_mode, v);
19042 /* Generate code for floating point ABS or NEG. */
19044 void
19045 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19046 rtx operands[])
19048 rtx mask, set, dst, src;
19049 bool use_sse = false;
19050 bool vector_mode = VECTOR_MODE_P (mode);
19051 enum machine_mode vmode = mode;
19053 if (vector_mode)
19054 use_sse = true;
19055 else if (mode == TFmode)
19056 use_sse = true;
19057 else if (TARGET_SSE_MATH)
19059 use_sse = SSE_FLOAT_MODE_P (mode);
19060 if (mode == SFmode)
19061 vmode = V4SFmode;
19062 else if (mode == DFmode)
19063 vmode = V2DFmode;
19066 /* NEG and ABS performed with SSE use bitwise mask operations.
19067 Create the appropriate mask now. */
19068 if (use_sse)
19069 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19070 else
19071 mask = NULL_RTX;
19073 dst = operands[0];
19074 src = operands[1];
19076 set = gen_rtx_fmt_e (code, mode, src);
19077 set = gen_rtx_SET (VOIDmode, dst, set);
19079 if (mask)
19081 rtx use, clob;
19082 rtvec par;
19084 use = gen_rtx_USE (VOIDmode, mask);
19085 if (vector_mode)
19086 par = gen_rtvec (2, set, use);
19087 else
19089 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19090 par = gen_rtvec (3, set, use, clob);
19092 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19094 else
19095 emit_insn (set);
19098 /* Expand a copysign operation. Special case operand 0 being a constant. */
19100 void
19101 ix86_expand_copysign (rtx operands[])
19103 enum machine_mode mode, vmode;
19104 rtx dest, op0, op1, mask, nmask;
19106 dest = operands[0];
19107 op0 = operands[1];
19108 op1 = operands[2];
19110 mode = GET_MODE (dest);
19112 if (mode == SFmode)
19113 vmode = V4SFmode;
19114 else if (mode == DFmode)
19115 vmode = V2DFmode;
19116 else
19117 vmode = mode;
19119 if (GET_CODE (op0) == CONST_DOUBLE)
19121 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19123 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19124 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19126 if (mode == SFmode || mode == DFmode)
19128 if (op0 == CONST0_RTX (mode))
19129 op0 = CONST0_RTX (vmode);
19130 else
19132 rtx v = ix86_build_const_vector (vmode, false, op0);
19134 op0 = force_reg (vmode, v);
19137 else if (op0 != CONST0_RTX (mode))
19138 op0 = force_reg (mode, op0);
19140 mask = ix86_build_signbit_mask (vmode, 0, 0);
19142 if (mode == SFmode)
19143 copysign_insn = gen_copysignsf3_const;
19144 else if (mode == DFmode)
19145 copysign_insn = gen_copysigndf3_const;
19146 else
19147 copysign_insn = gen_copysigntf3_const;
19149 emit_insn (copysign_insn (dest, op0, op1, mask));
19151 else
19153 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19155 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19156 mask = ix86_build_signbit_mask (vmode, 0, 0);
19158 if (mode == SFmode)
19159 copysign_insn = gen_copysignsf3_var;
19160 else if (mode == DFmode)
19161 copysign_insn = gen_copysigndf3_var;
19162 else
19163 copysign_insn = gen_copysigntf3_var;
19165 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19169 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19170 be a constant, and so has already been expanded into a vector constant. */
19172 void
19173 ix86_split_copysign_const (rtx operands[])
19175 enum machine_mode mode, vmode;
19176 rtx dest, op0, mask, x;
19178 dest = operands[0];
19179 op0 = operands[1];
19180 mask = operands[3];
19182 mode = GET_MODE (dest);
19183 vmode = GET_MODE (mask);
19185 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19186 x = gen_rtx_AND (vmode, dest, mask);
19187 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19189 if (op0 != CONST0_RTX (vmode))
19191 x = gen_rtx_IOR (vmode, dest, op0);
19192 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19196 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19197 so we have to do two masks. */
19199 void
19200 ix86_split_copysign_var (rtx operands[])
19202 enum machine_mode mode, vmode;
19203 rtx dest, scratch, op0, op1, mask, nmask, x;
19205 dest = operands[0];
19206 scratch = operands[1];
19207 op0 = operands[2];
19208 op1 = operands[3];
19209 nmask = operands[4];
19210 mask = operands[5];
19212 mode = GET_MODE (dest);
19213 vmode = GET_MODE (mask);
19215 if (rtx_equal_p (op0, op1))
19217 /* Shouldn't happen often (it's useless, obviously), but when it does
19218 we'd generate incorrect code if we continue below. */
19219 emit_move_insn (dest, op0);
19220 return;
19223 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19225 gcc_assert (REGNO (op1) == REGNO (scratch));
19227 x = gen_rtx_AND (vmode, scratch, mask);
19228 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19230 dest = mask;
19231 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19232 x = gen_rtx_NOT (vmode, dest);
19233 x = gen_rtx_AND (vmode, x, op0);
19234 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19236 else
19238 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19240 x = gen_rtx_AND (vmode, scratch, mask);
19242 else /* alternative 2,4 */
19244 gcc_assert (REGNO (mask) == REGNO (scratch));
19245 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19246 x = gen_rtx_AND (vmode, scratch, op1);
19248 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19250 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19252 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19253 x = gen_rtx_AND (vmode, dest, nmask);
19255 else /* alternative 3,4 */
19257 gcc_assert (REGNO (nmask) == REGNO (dest));
19258 dest = nmask;
19259 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19260 x = gen_rtx_AND (vmode, dest, op0);
19262 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19265 x = gen_rtx_IOR (vmode, dest, scratch);
19266 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19269 /* Return TRUE or FALSE depending on whether the first SET in INSN
19270 has source and destination with matching CC modes, and that the
19271 CC mode is at least as constrained as REQ_MODE. */
19273 bool
19274 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19276 rtx set;
19277 enum machine_mode set_mode;
19279 set = PATTERN (insn);
19280 if (GET_CODE (set) == PARALLEL)
19281 set = XVECEXP (set, 0, 0);
19282 gcc_assert (GET_CODE (set) == SET);
19283 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19285 set_mode = GET_MODE (SET_DEST (set));
19286 switch (set_mode)
19288 case CCNOmode:
19289 if (req_mode != CCNOmode
19290 && (req_mode != CCmode
19291 || XEXP (SET_SRC (set), 1) != const0_rtx))
19292 return false;
19293 break;
19294 case CCmode:
19295 if (req_mode == CCGCmode)
19296 return false;
19297 /* FALLTHRU */
19298 case CCGCmode:
19299 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19300 return false;
19301 /* FALLTHRU */
19302 case CCGOCmode:
19303 if (req_mode == CCZmode)
19304 return false;
19305 /* FALLTHRU */
19306 case CCZmode:
19307 break;
19309 case CCAmode:
19310 case CCCmode:
19311 case CCOmode:
19312 case CCSmode:
19313 if (set_mode != req_mode)
19314 return false;
19315 break;
19317 default:
19318 gcc_unreachable ();
19321 return GET_MODE (SET_SRC (set)) == set_mode;
19324 /* Generate insn patterns to do an integer compare of OPERANDS. */
19326 static rtx
19327 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19329 enum machine_mode cmpmode;
19330 rtx tmp, flags;
19332 cmpmode = SELECT_CC_MODE (code, op0, op1);
19333 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19335 /* This is very simple, but making the interface the same as in the
19336 FP case makes the rest of the code easier. */
19337 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19338 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19340 /* Return the test that should be put into the flags user, i.e.
19341 the bcc, scc, or cmov instruction. */
19342 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19345 /* Figure out whether to use ordered or unordered fp comparisons.
19346 Return the appropriate mode to use. */
19348 enum machine_mode
19349 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19351 /* ??? In order to make all comparisons reversible, we do all comparisons
19352 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19353 all forms trapping and nontrapping comparisons, we can make inequality
19354 comparisons trapping again, since it results in better code when using
19355 FCOM based compares. */
19356 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19359 enum machine_mode
19360 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19362 enum machine_mode mode = GET_MODE (op0);
19364 if (SCALAR_FLOAT_MODE_P (mode))
19366 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19367 return ix86_fp_compare_mode (code);
19370 switch (code)
19372 /* Only zero flag is needed. */
19373 case EQ: /* ZF=0 */
19374 case NE: /* ZF!=0 */
19375 return CCZmode;
19376 /* Codes needing carry flag. */
19377 case GEU: /* CF=0 */
19378 case LTU: /* CF=1 */
19379 /* Detect overflow checks. They need just the carry flag. */
19380 if (GET_CODE (op0) == PLUS
19381 && rtx_equal_p (op1, XEXP (op0, 0)))
19382 return CCCmode;
19383 else
19384 return CCmode;
19385 case GTU: /* CF=0 & ZF=0 */
19386 case LEU: /* CF=1 | ZF=1 */
19387 return CCmode;
19388 /* Codes possibly doable only with sign flag when
19389 comparing against zero. */
19390 case GE: /* SF=OF or SF=0 */
19391 case LT: /* SF<>OF or SF=1 */
19392 if (op1 == const0_rtx)
19393 return CCGOCmode;
19394 else
19395 /* For other cases Carry flag is not required. */
19396 return CCGCmode;
19397 /* Codes doable only with sign flag when comparing
19398 against zero, but we miss jump instruction for it
19399 so we need to use relational tests against overflow
19400 that thus needs to be zero. */
19401 case GT: /* ZF=0 & SF=OF */
19402 case LE: /* ZF=1 | SF<>OF */
19403 if (op1 == const0_rtx)
19404 return CCNOmode;
19405 else
19406 return CCGCmode;
19407 /* strcmp pattern do (use flags) and combine may ask us for proper
19408 mode. */
19409 case USE:
19410 return CCmode;
19411 default:
19412 gcc_unreachable ();
19416 /* Return the fixed registers used for condition codes. */
19418 static bool
19419 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19421 *p1 = FLAGS_REG;
19422 *p2 = FPSR_REG;
19423 return true;
19426 /* If two condition code modes are compatible, return a condition code
19427 mode which is compatible with both. Otherwise, return
19428 VOIDmode. */
19430 static enum machine_mode
19431 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19433 if (m1 == m2)
19434 return m1;
19436 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19437 return VOIDmode;
19439 if ((m1 == CCGCmode && m2 == CCGOCmode)
19440 || (m1 == CCGOCmode && m2 == CCGCmode))
19441 return CCGCmode;
19443 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19444 return m2;
19445 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19446 return m1;
19448 switch (m1)
19450 default:
19451 gcc_unreachable ();
19453 case CCmode:
19454 case CCGCmode:
19455 case CCGOCmode:
19456 case CCNOmode:
19457 case CCAmode:
19458 case CCCmode:
19459 case CCOmode:
19460 case CCSmode:
19461 case CCZmode:
19462 switch (m2)
19464 default:
19465 return VOIDmode;
19467 case CCmode:
19468 case CCGCmode:
19469 case CCGOCmode:
19470 case CCNOmode:
19471 case CCAmode:
19472 case CCCmode:
19473 case CCOmode:
19474 case CCSmode:
19475 case CCZmode:
19476 return CCmode;
19479 case CCFPmode:
19480 case CCFPUmode:
19481 /* These are only compatible with themselves, which we already
19482 checked above. */
19483 return VOIDmode;
19488 /* Return a comparison we can do and that it is equivalent to
19489 swap_condition (code) apart possibly from orderedness.
19490 But, never change orderedness if TARGET_IEEE_FP, returning
19491 UNKNOWN in that case if necessary. */
19493 static enum rtx_code
19494 ix86_fp_swap_condition (enum rtx_code code)
19496 switch (code)
19498 case GT: /* GTU - CF=0 & ZF=0 */
19499 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19500 case GE: /* GEU - CF=0 */
19501 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19502 case UNLT: /* LTU - CF=1 */
19503 return TARGET_IEEE_FP ? UNKNOWN : GT;
19504 case UNLE: /* LEU - CF=1 | ZF=1 */
19505 return TARGET_IEEE_FP ? UNKNOWN : GE;
19506 default:
19507 return swap_condition (code);
19511 /* Return cost of comparison CODE using the best strategy for performance.
19512 All following functions do use number of instructions as a cost metrics.
19513 In future this should be tweaked to compute bytes for optimize_size and
19514 take into account performance of various instructions on various CPUs. */
19516 static int
19517 ix86_fp_comparison_cost (enum rtx_code code)
19519 int arith_cost;
19521 /* The cost of code using bit-twiddling on %ah. */
19522 switch (code)
19524 case UNLE:
19525 case UNLT:
19526 case LTGT:
19527 case GT:
19528 case GE:
19529 case UNORDERED:
19530 case ORDERED:
19531 case UNEQ:
19532 arith_cost = 4;
19533 break;
19534 case LT:
19535 case NE:
19536 case EQ:
19537 case UNGE:
19538 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19539 break;
19540 case LE:
19541 case UNGT:
19542 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19543 break;
19544 default:
19545 gcc_unreachable ();
19548 switch (ix86_fp_comparison_strategy (code))
19550 case IX86_FPCMP_COMI:
19551 return arith_cost > 4 ? 3 : 2;
19552 case IX86_FPCMP_SAHF:
19553 return arith_cost > 4 ? 4 : 3;
19554 default:
19555 return arith_cost;
19559 /* Return strategy to use for floating-point. We assume that fcomi is always
19560 preferrable where available, since that is also true when looking at size
19561 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19563 enum ix86_fpcmp_strategy
19564 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19566 /* Do fcomi/sahf based test when profitable. */
19568 if (TARGET_CMOVE)
19569 return IX86_FPCMP_COMI;
19571 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19572 return IX86_FPCMP_SAHF;
19574 return IX86_FPCMP_ARITH;
19577 /* Swap, force into registers, or otherwise massage the two operands
19578 to a fp comparison. The operands are updated in place; the new
19579 comparison code is returned. */
19581 static enum rtx_code
19582 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19584 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19585 rtx op0 = *pop0, op1 = *pop1;
19586 enum machine_mode op_mode = GET_MODE (op0);
19587 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19589 /* All of the unordered compare instructions only work on registers.
19590 The same is true of the fcomi compare instructions. The XFmode
19591 compare instructions require registers except when comparing
19592 against zero or when converting operand 1 from fixed point to
19593 floating point. */
19595 if (!is_sse
19596 && (fpcmp_mode == CCFPUmode
19597 || (op_mode == XFmode
19598 && ! (standard_80387_constant_p (op0) == 1
19599 || standard_80387_constant_p (op1) == 1)
19600 && GET_CODE (op1) != FLOAT)
19601 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19603 op0 = force_reg (op_mode, op0);
19604 op1 = force_reg (op_mode, op1);
19606 else
19608 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19609 things around if they appear profitable, otherwise force op0
19610 into a register. */
19612 if (standard_80387_constant_p (op0) == 0
19613 || (MEM_P (op0)
19614 && ! (standard_80387_constant_p (op1) == 0
19615 || MEM_P (op1))))
19617 enum rtx_code new_code = ix86_fp_swap_condition (code);
19618 if (new_code != UNKNOWN)
19620 rtx tmp;
19621 tmp = op0, op0 = op1, op1 = tmp;
19622 code = new_code;
19626 if (!REG_P (op0))
19627 op0 = force_reg (op_mode, op0);
19629 if (CONSTANT_P (op1))
19631 int tmp = standard_80387_constant_p (op1);
19632 if (tmp == 0)
19633 op1 = validize_mem (force_const_mem (op_mode, op1));
19634 else if (tmp == 1)
19636 if (TARGET_CMOVE)
19637 op1 = force_reg (op_mode, op1);
19639 else
19640 op1 = force_reg (op_mode, op1);
19644 /* Try to rearrange the comparison to make it cheaper. */
19645 if (ix86_fp_comparison_cost (code)
19646 > ix86_fp_comparison_cost (swap_condition (code))
19647 && (REG_P (op1) || can_create_pseudo_p ()))
19649 rtx tmp;
19650 tmp = op0, op0 = op1, op1 = tmp;
19651 code = swap_condition (code);
19652 if (!REG_P (op0))
19653 op0 = force_reg (op_mode, op0);
19656 *pop0 = op0;
19657 *pop1 = op1;
19658 return code;
19661 /* Convert comparison codes we use to represent FP comparison to integer
19662 code that will result in proper branch. Return UNKNOWN if no such code
19663 is available. */
19665 enum rtx_code
19666 ix86_fp_compare_code_to_integer (enum rtx_code code)
19668 switch (code)
19670 case GT:
19671 return GTU;
19672 case GE:
19673 return GEU;
19674 case ORDERED:
19675 case UNORDERED:
19676 return code;
19677 break;
19678 case UNEQ:
19679 return EQ;
19680 break;
19681 case UNLT:
19682 return LTU;
19683 break;
19684 case UNLE:
19685 return LEU;
19686 break;
19687 case LTGT:
19688 return NE;
19689 break;
19690 default:
19691 return UNKNOWN;
19695 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19697 static rtx
19698 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19700 enum machine_mode fpcmp_mode, intcmp_mode;
19701 rtx tmp, tmp2;
19703 fpcmp_mode = ix86_fp_compare_mode (code);
19704 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19706 /* Do fcomi/sahf based test when profitable. */
19707 switch (ix86_fp_comparison_strategy (code))
19709 case IX86_FPCMP_COMI:
19710 intcmp_mode = fpcmp_mode;
19711 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19712 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19713 tmp);
19714 emit_insn (tmp);
19715 break;
19717 case IX86_FPCMP_SAHF:
19718 intcmp_mode = fpcmp_mode;
19719 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19720 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19721 tmp);
19723 if (!scratch)
19724 scratch = gen_reg_rtx (HImode);
19725 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19726 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19727 break;
19729 case IX86_FPCMP_ARITH:
19730 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19731 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19732 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19733 if (!scratch)
19734 scratch = gen_reg_rtx (HImode);
19735 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19737 /* In the unordered case, we have to check C2 for NaN's, which
19738 doesn't happen to work out to anything nice combination-wise.
19739 So do some bit twiddling on the value we've got in AH to come
19740 up with an appropriate set of condition codes. */
19742 intcmp_mode = CCNOmode;
19743 switch (code)
19745 case GT:
19746 case UNGT:
19747 if (code == GT || !TARGET_IEEE_FP)
19749 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19750 code = EQ;
19752 else
19754 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19755 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19756 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19757 intcmp_mode = CCmode;
19758 code = GEU;
19760 break;
19761 case LT:
19762 case UNLT:
19763 if (code == LT && TARGET_IEEE_FP)
19765 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19766 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19767 intcmp_mode = CCmode;
19768 code = EQ;
19770 else
19772 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19773 code = NE;
19775 break;
19776 case GE:
19777 case UNGE:
19778 if (code == GE || !TARGET_IEEE_FP)
19780 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19781 code = EQ;
19783 else
19785 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19786 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19787 code = NE;
19789 break;
19790 case LE:
19791 case UNLE:
19792 if (code == LE && TARGET_IEEE_FP)
19794 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19795 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19796 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19797 intcmp_mode = CCmode;
19798 code = LTU;
19800 else
19802 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19803 code = NE;
19805 break;
19806 case EQ:
19807 case UNEQ:
19808 if (code == EQ && TARGET_IEEE_FP)
19810 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19811 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19812 intcmp_mode = CCmode;
19813 code = EQ;
19815 else
19817 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19818 code = NE;
19820 break;
19821 case NE:
19822 case LTGT:
19823 if (code == NE && TARGET_IEEE_FP)
19825 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19826 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19827 GEN_INT (0x40)));
19828 code = NE;
19830 else
19832 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19833 code = EQ;
19835 break;
19837 case UNORDERED:
19838 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19839 code = NE;
19840 break;
19841 case ORDERED:
19842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19843 code = EQ;
19844 break;
19846 default:
19847 gcc_unreachable ();
19849 break;
19851 default:
19852 gcc_unreachable();
19855 /* Return the test that should be put into the flags user, i.e.
19856 the bcc, scc, or cmov instruction. */
19857 return gen_rtx_fmt_ee (code, VOIDmode,
19858 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19859 const0_rtx);
19862 static rtx
19863 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19865 rtx ret;
19867 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19868 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19870 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19872 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19873 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19875 else
19876 ret = ix86_expand_int_compare (code, op0, op1);
19878 return ret;
19881 void
19882 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19884 enum machine_mode mode = GET_MODE (op0);
19885 rtx tmp;
19887 switch (mode)
19889 case SFmode:
19890 case DFmode:
19891 case XFmode:
19892 case QImode:
19893 case HImode:
19894 case SImode:
19895 simple:
19896 tmp = ix86_expand_compare (code, op0, op1);
19897 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19898 gen_rtx_LABEL_REF (VOIDmode, label),
19899 pc_rtx);
19900 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19901 return;
19903 case DImode:
19904 if (TARGET_64BIT)
19905 goto simple;
19906 case TImode:
19907 /* Expand DImode branch into multiple compare+branch. */
19909 rtx lo[2], hi[2], label2;
19910 enum rtx_code code1, code2, code3;
19911 enum machine_mode submode;
19913 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19915 tmp = op0, op0 = op1, op1 = tmp;
19916 code = swap_condition (code);
19919 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19920 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19922 submode = mode == DImode ? SImode : DImode;
19924 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19925 avoid two branches. This costs one extra insn, so disable when
19926 optimizing for size. */
19928 if ((code == EQ || code == NE)
19929 && (!optimize_insn_for_size_p ()
19930 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19932 rtx xor0, xor1;
19934 xor1 = hi[0];
19935 if (hi[1] != const0_rtx)
19936 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19937 NULL_RTX, 0, OPTAB_WIDEN);
19939 xor0 = lo[0];
19940 if (lo[1] != const0_rtx)
19941 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19942 NULL_RTX, 0, OPTAB_WIDEN);
19944 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19945 NULL_RTX, 0, OPTAB_WIDEN);
19947 ix86_expand_branch (code, tmp, const0_rtx, label);
19948 return;
19951 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19952 op1 is a constant and the low word is zero, then we can just
19953 examine the high word. Similarly for low word -1 and
19954 less-or-equal-than or greater-than. */
19956 if (CONST_INT_P (hi[1]))
19957 switch (code)
19959 case LT: case LTU: case GE: case GEU:
19960 if (lo[1] == const0_rtx)
19962 ix86_expand_branch (code, hi[0], hi[1], label);
19963 return;
19965 break;
19966 case LE: case LEU: case GT: case GTU:
19967 if (lo[1] == constm1_rtx)
19969 ix86_expand_branch (code, hi[0], hi[1], label);
19970 return;
19972 break;
19973 default:
19974 break;
19977 /* Otherwise, we need two or three jumps. */
19979 label2 = gen_label_rtx ();
19981 code1 = code;
19982 code2 = swap_condition (code);
19983 code3 = unsigned_condition (code);
19985 switch (code)
19987 case LT: case GT: case LTU: case GTU:
19988 break;
19990 case LE: code1 = LT; code2 = GT; break;
19991 case GE: code1 = GT; code2 = LT; break;
19992 case LEU: code1 = LTU; code2 = GTU; break;
19993 case GEU: code1 = GTU; code2 = LTU; break;
19995 case EQ: code1 = UNKNOWN; code2 = NE; break;
19996 case NE: code2 = UNKNOWN; break;
19998 default:
19999 gcc_unreachable ();
20003 * a < b =>
20004 * if (hi(a) < hi(b)) goto true;
20005 * if (hi(a) > hi(b)) goto false;
20006 * if (lo(a) < lo(b)) goto true;
20007 * false:
20010 if (code1 != UNKNOWN)
20011 ix86_expand_branch (code1, hi[0], hi[1], label);
20012 if (code2 != UNKNOWN)
20013 ix86_expand_branch (code2, hi[0], hi[1], label2);
20015 ix86_expand_branch (code3, lo[0], lo[1], label);
20017 if (code2 != UNKNOWN)
20018 emit_label (label2);
20019 return;
20022 default:
20023 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20024 goto simple;
20028 /* Split branch based on floating point condition. */
20029 void
20030 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20031 rtx target1, rtx target2, rtx tmp)
20033 rtx condition;
20034 rtx i;
20036 if (target2 != pc_rtx)
20038 rtx tmp = target2;
20039 code = reverse_condition_maybe_unordered (code);
20040 target2 = target1;
20041 target1 = tmp;
20044 condition = ix86_expand_fp_compare (code, op1, op2,
20045 tmp);
20047 i = emit_jump_insn (gen_rtx_SET
20048 (VOIDmode, pc_rtx,
20049 gen_rtx_IF_THEN_ELSE (VOIDmode,
20050 condition, target1, target2)));
20051 if (split_branch_probability >= 0)
20052 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20055 void
20056 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20058 rtx ret;
20060 gcc_assert (GET_MODE (dest) == QImode);
20062 ret = ix86_expand_compare (code, op0, op1);
20063 PUT_MODE (ret, QImode);
20064 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20067 /* Expand comparison setting or clearing carry flag. Return true when
20068 successful and set pop for the operation. */
20069 static bool
20070 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20072 enum machine_mode mode =
20073 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20075 /* Do not handle double-mode compares that go through special path. */
20076 if (mode == (TARGET_64BIT ? TImode : DImode))
20077 return false;
20079 if (SCALAR_FLOAT_MODE_P (mode))
20081 rtx compare_op, compare_seq;
20083 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20085 /* Shortcut: following common codes never translate
20086 into carry flag compares. */
20087 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20088 || code == ORDERED || code == UNORDERED)
20089 return false;
20091 /* These comparisons require zero flag; swap operands so they won't. */
20092 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20093 && !TARGET_IEEE_FP)
20095 rtx tmp = op0;
20096 op0 = op1;
20097 op1 = tmp;
20098 code = swap_condition (code);
20101 /* Try to expand the comparison and verify that we end up with
20102 carry flag based comparison. This fails to be true only when
20103 we decide to expand comparison using arithmetic that is not
20104 too common scenario. */
20105 start_sequence ();
20106 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20107 compare_seq = get_insns ();
20108 end_sequence ();
20110 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20111 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20112 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20113 else
20114 code = GET_CODE (compare_op);
20116 if (code != LTU && code != GEU)
20117 return false;
20119 emit_insn (compare_seq);
20120 *pop = compare_op;
20121 return true;
20124 if (!INTEGRAL_MODE_P (mode))
20125 return false;
20127 switch (code)
20129 case LTU:
20130 case GEU:
20131 break;
20133 /* Convert a==0 into (unsigned)a<1. */
20134 case EQ:
20135 case NE:
20136 if (op1 != const0_rtx)
20137 return false;
20138 op1 = const1_rtx;
20139 code = (code == EQ ? LTU : GEU);
20140 break;
20142 /* Convert a>b into b<a or a>=b-1. */
20143 case GTU:
20144 case LEU:
20145 if (CONST_INT_P (op1))
20147 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20148 /* Bail out on overflow. We still can swap operands but that
20149 would force loading of the constant into register. */
20150 if (op1 == const0_rtx
20151 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20152 return false;
20153 code = (code == GTU ? GEU : LTU);
20155 else
20157 rtx tmp = op1;
20158 op1 = op0;
20159 op0 = tmp;
20160 code = (code == GTU ? LTU : GEU);
20162 break;
20164 /* Convert a>=0 into (unsigned)a<0x80000000. */
20165 case LT:
20166 case GE:
20167 if (mode == DImode || op1 != const0_rtx)
20168 return false;
20169 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20170 code = (code == LT ? GEU : LTU);
20171 break;
20172 case LE:
20173 case GT:
20174 if (mode == DImode || op1 != constm1_rtx)
20175 return false;
20176 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20177 code = (code == LE ? GEU : LTU);
20178 break;
20180 default:
20181 return false;
20183 /* Swapping operands may cause constant to appear as first operand. */
20184 if (!nonimmediate_operand (op0, VOIDmode))
20186 if (!can_create_pseudo_p ())
20187 return false;
20188 op0 = force_reg (mode, op0);
20190 *pop = ix86_expand_compare (code, op0, op1);
20191 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20192 return true;
20195 bool
20196 ix86_expand_int_movcc (rtx operands[])
20198 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20199 rtx compare_seq, compare_op;
20200 enum machine_mode mode = GET_MODE (operands[0]);
20201 bool sign_bit_compare_p = false;
20202 rtx op0 = XEXP (operands[1], 0);
20203 rtx op1 = XEXP (operands[1], 1);
20205 if (GET_MODE (op0) == TImode
20206 || (GET_MODE (op0) == DImode
20207 && !TARGET_64BIT))
20208 return false;
20210 start_sequence ();
20211 compare_op = ix86_expand_compare (code, op0, op1);
20212 compare_seq = get_insns ();
20213 end_sequence ();
20215 compare_code = GET_CODE (compare_op);
20217 if ((op1 == const0_rtx && (code == GE || code == LT))
20218 || (op1 == constm1_rtx && (code == GT || code == LE)))
20219 sign_bit_compare_p = true;
20221 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20222 HImode insns, we'd be swallowed in word prefix ops. */
20224 if ((mode != HImode || TARGET_FAST_PREFIX)
20225 && (mode != (TARGET_64BIT ? TImode : DImode))
20226 && CONST_INT_P (operands[2])
20227 && CONST_INT_P (operands[3]))
20229 rtx out = operands[0];
20230 HOST_WIDE_INT ct = INTVAL (operands[2]);
20231 HOST_WIDE_INT cf = INTVAL (operands[3]);
20232 HOST_WIDE_INT diff;
20234 diff = ct - cf;
20235 /* Sign bit compares are better done using shifts than we do by using
20236 sbb. */
20237 if (sign_bit_compare_p
20238 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20240 /* Detect overlap between destination and compare sources. */
20241 rtx tmp = out;
20243 if (!sign_bit_compare_p)
20245 rtx flags;
20246 bool fpcmp = false;
20248 compare_code = GET_CODE (compare_op);
20250 flags = XEXP (compare_op, 0);
20252 if (GET_MODE (flags) == CCFPmode
20253 || GET_MODE (flags) == CCFPUmode)
20255 fpcmp = true;
20256 compare_code
20257 = ix86_fp_compare_code_to_integer (compare_code);
20260 /* To simplify rest of code, restrict to the GEU case. */
20261 if (compare_code == LTU)
20263 HOST_WIDE_INT tmp = ct;
20264 ct = cf;
20265 cf = tmp;
20266 compare_code = reverse_condition (compare_code);
20267 code = reverse_condition (code);
20269 else
20271 if (fpcmp)
20272 PUT_CODE (compare_op,
20273 reverse_condition_maybe_unordered
20274 (GET_CODE (compare_op)));
20275 else
20276 PUT_CODE (compare_op,
20277 reverse_condition (GET_CODE (compare_op)));
20279 diff = ct - cf;
20281 if (reg_overlap_mentioned_p (out, op0)
20282 || reg_overlap_mentioned_p (out, op1))
20283 tmp = gen_reg_rtx (mode);
20285 if (mode == DImode)
20286 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20287 else
20288 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20289 flags, compare_op));
20291 else
20293 if (code == GT || code == GE)
20294 code = reverse_condition (code);
20295 else
20297 HOST_WIDE_INT tmp = ct;
20298 ct = cf;
20299 cf = tmp;
20300 diff = ct - cf;
20302 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20305 if (diff == 1)
20308 * cmpl op0,op1
20309 * sbbl dest,dest
20310 * [addl dest, ct]
20312 * Size 5 - 8.
20314 if (ct)
20315 tmp = expand_simple_binop (mode, PLUS,
20316 tmp, GEN_INT (ct),
20317 copy_rtx (tmp), 1, OPTAB_DIRECT);
20319 else if (cf == -1)
20322 * cmpl op0,op1
20323 * sbbl dest,dest
20324 * orl $ct, dest
20326 * Size 8.
20328 tmp = expand_simple_binop (mode, IOR,
20329 tmp, GEN_INT (ct),
20330 copy_rtx (tmp), 1, OPTAB_DIRECT);
20332 else if (diff == -1 && ct)
20335 * cmpl op0,op1
20336 * sbbl dest,dest
20337 * notl dest
20338 * [addl dest, cf]
20340 * Size 8 - 11.
20342 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20343 if (cf)
20344 tmp = expand_simple_binop (mode, PLUS,
20345 copy_rtx (tmp), GEN_INT (cf),
20346 copy_rtx (tmp), 1, OPTAB_DIRECT);
20348 else
20351 * cmpl op0,op1
20352 * sbbl dest,dest
20353 * [notl dest]
20354 * andl cf - ct, dest
20355 * [addl dest, ct]
20357 * Size 8 - 11.
20360 if (cf == 0)
20362 cf = ct;
20363 ct = 0;
20364 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20367 tmp = expand_simple_binop (mode, AND,
20368 copy_rtx (tmp),
20369 gen_int_mode (cf - ct, mode),
20370 copy_rtx (tmp), 1, OPTAB_DIRECT);
20371 if (ct)
20372 tmp = expand_simple_binop (mode, PLUS,
20373 copy_rtx (tmp), GEN_INT (ct),
20374 copy_rtx (tmp), 1, OPTAB_DIRECT);
20377 if (!rtx_equal_p (tmp, out))
20378 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20380 return true;
20383 if (diff < 0)
20385 enum machine_mode cmp_mode = GET_MODE (op0);
20387 HOST_WIDE_INT tmp;
20388 tmp = ct, ct = cf, cf = tmp;
20389 diff = -diff;
20391 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20393 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20395 /* We may be reversing unordered compare to normal compare, that
20396 is not valid in general (we may convert non-trapping condition
20397 to trapping one), however on i386 we currently emit all
20398 comparisons unordered. */
20399 compare_code = reverse_condition_maybe_unordered (compare_code);
20400 code = reverse_condition_maybe_unordered (code);
20402 else
20404 compare_code = reverse_condition (compare_code);
20405 code = reverse_condition (code);
20409 compare_code = UNKNOWN;
20410 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20411 && CONST_INT_P (op1))
20413 if (op1 == const0_rtx
20414 && (code == LT || code == GE))
20415 compare_code = code;
20416 else if (op1 == constm1_rtx)
20418 if (code == LE)
20419 compare_code = LT;
20420 else if (code == GT)
20421 compare_code = GE;
20425 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20426 if (compare_code != UNKNOWN
20427 && GET_MODE (op0) == GET_MODE (out)
20428 && (cf == -1 || ct == -1))
20430 /* If lea code below could be used, only optimize
20431 if it results in a 2 insn sequence. */
20433 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20434 || diff == 3 || diff == 5 || diff == 9)
20435 || (compare_code == LT && ct == -1)
20436 || (compare_code == GE && cf == -1))
20439 * notl op1 (if necessary)
20440 * sarl $31, op1
20441 * orl cf, op1
20443 if (ct != -1)
20445 cf = ct;
20446 ct = -1;
20447 code = reverse_condition (code);
20450 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20452 out = expand_simple_binop (mode, IOR,
20453 out, GEN_INT (cf),
20454 out, 1, OPTAB_DIRECT);
20455 if (out != operands[0])
20456 emit_move_insn (operands[0], out);
20458 return true;
20463 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20464 || diff == 3 || diff == 5 || diff == 9)
20465 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20466 && (mode != DImode
20467 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20470 * xorl dest,dest
20471 * cmpl op1,op2
20472 * setcc dest
20473 * lea cf(dest*(ct-cf)),dest
20475 * Size 14.
20477 * This also catches the degenerate setcc-only case.
20480 rtx tmp;
20481 int nops;
20483 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20485 nops = 0;
20486 /* On x86_64 the lea instruction operates on Pmode, so we need
20487 to get arithmetics done in proper mode to match. */
20488 if (diff == 1)
20489 tmp = copy_rtx (out);
20490 else
20492 rtx out1;
20493 out1 = copy_rtx (out);
20494 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20495 nops++;
20496 if (diff & 1)
20498 tmp = gen_rtx_PLUS (mode, tmp, out1);
20499 nops++;
20502 if (cf != 0)
20504 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20505 nops++;
20507 if (!rtx_equal_p (tmp, out))
20509 if (nops == 1)
20510 out = force_operand (tmp, copy_rtx (out));
20511 else
20512 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20514 if (!rtx_equal_p (out, operands[0]))
20515 emit_move_insn (operands[0], copy_rtx (out));
20517 return true;
20521 * General case: Jumpful:
20522 * xorl dest,dest cmpl op1, op2
20523 * cmpl op1, op2 movl ct, dest
20524 * setcc dest jcc 1f
20525 * decl dest movl cf, dest
20526 * andl (cf-ct),dest 1:
20527 * addl ct,dest
20529 * Size 20. Size 14.
20531 * This is reasonably steep, but branch mispredict costs are
20532 * high on modern cpus, so consider failing only if optimizing
20533 * for space.
20536 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20537 && BRANCH_COST (optimize_insn_for_speed_p (),
20538 false) >= 2)
20540 if (cf == 0)
20542 enum machine_mode cmp_mode = GET_MODE (op0);
20544 cf = ct;
20545 ct = 0;
20547 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20549 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20551 /* We may be reversing unordered compare to normal compare,
20552 that is not valid in general (we may convert non-trapping
20553 condition to trapping one), however on i386 we currently
20554 emit all comparisons unordered. */
20555 code = reverse_condition_maybe_unordered (code);
20557 else
20559 code = reverse_condition (code);
20560 if (compare_code != UNKNOWN)
20561 compare_code = reverse_condition (compare_code);
20565 if (compare_code != UNKNOWN)
20567 /* notl op1 (if needed)
20568 sarl $31, op1
20569 andl (cf-ct), op1
20570 addl ct, op1
20572 For x < 0 (resp. x <= -1) there will be no notl,
20573 so if possible swap the constants to get rid of the
20574 complement.
20575 True/false will be -1/0 while code below (store flag
20576 followed by decrement) is 0/-1, so the constants need
20577 to be exchanged once more. */
20579 if (compare_code == GE || !cf)
20581 code = reverse_condition (code);
20582 compare_code = LT;
20584 else
20586 HOST_WIDE_INT tmp = cf;
20587 cf = ct;
20588 ct = tmp;
20591 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20593 else
20595 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20597 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20598 constm1_rtx,
20599 copy_rtx (out), 1, OPTAB_DIRECT);
20602 out = expand_simple_binop (mode, AND, copy_rtx (out),
20603 gen_int_mode (cf - ct, mode),
20604 copy_rtx (out), 1, OPTAB_DIRECT);
20605 if (ct)
20606 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20607 copy_rtx (out), 1, OPTAB_DIRECT);
20608 if (!rtx_equal_p (out, operands[0]))
20609 emit_move_insn (operands[0], copy_rtx (out));
20611 return true;
20615 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20617 /* Try a few things more with specific constants and a variable. */
20619 optab op;
20620 rtx var, orig_out, out, tmp;
20622 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20623 return false;
20625 /* If one of the two operands is an interesting constant, load a
20626 constant with the above and mask it in with a logical operation. */
20628 if (CONST_INT_P (operands[2]))
20630 var = operands[3];
20631 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20632 operands[3] = constm1_rtx, op = and_optab;
20633 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20634 operands[3] = const0_rtx, op = ior_optab;
20635 else
20636 return false;
20638 else if (CONST_INT_P (operands[3]))
20640 var = operands[2];
20641 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20642 operands[2] = constm1_rtx, op = and_optab;
20643 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20644 operands[2] = const0_rtx, op = ior_optab;
20645 else
20646 return false;
20648 else
20649 return false;
20651 orig_out = operands[0];
20652 tmp = gen_reg_rtx (mode);
20653 operands[0] = tmp;
20655 /* Recurse to get the constant loaded. */
20656 if (ix86_expand_int_movcc (operands) == 0)
20657 return false;
20659 /* Mask in the interesting variable. */
20660 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20661 OPTAB_WIDEN);
20662 if (!rtx_equal_p (out, orig_out))
20663 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20665 return true;
20669 * For comparison with above,
20671 * movl cf,dest
20672 * movl ct,tmp
20673 * cmpl op1,op2
20674 * cmovcc tmp,dest
20676 * Size 15.
20679 if (! nonimmediate_operand (operands[2], mode))
20680 operands[2] = force_reg (mode, operands[2]);
20681 if (! nonimmediate_operand (operands[3], mode))
20682 operands[3] = force_reg (mode, operands[3]);
20684 if (! register_operand (operands[2], VOIDmode)
20685 && (mode == QImode
20686 || ! register_operand (operands[3], VOIDmode)))
20687 operands[2] = force_reg (mode, operands[2]);
20689 if (mode == QImode
20690 && ! register_operand (operands[3], VOIDmode))
20691 operands[3] = force_reg (mode, operands[3]);
20693 emit_insn (compare_seq);
20694 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20695 gen_rtx_IF_THEN_ELSE (mode,
20696 compare_op, operands[2],
20697 operands[3])));
20698 return true;
20701 /* Swap, force into registers, or otherwise massage the two operands
20702 to an sse comparison with a mask result. Thus we differ a bit from
20703 ix86_prepare_fp_compare_args which expects to produce a flags result.
20705 The DEST operand exists to help determine whether to commute commutative
20706 operators. The POP0/POP1 operands are updated in place. The new
20707 comparison code is returned, or UNKNOWN if not implementable. */
20709 static enum rtx_code
20710 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20711 rtx *pop0, rtx *pop1)
20713 rtx tmp;
20715 switch (code)
20717 case LTGT:
20718 case UNEQ:
20719 /* AVX supports all the needed comparisons. */
20720 if (TARGET_AVX)
20721 break;
20722 /* We have no LTGT as an operator. We could implement it with
20723 NE & ORDERED, but this requires an extra temporary. It's
20724 not clear that it's worth it. */
20725 return UNKNOWN;
20727 case LT:
20728 case LE:
20729 case UNGT:
20730 case UNGE:
20731 /* These are supported directly. */
20732 break;
20734 case EQ:
20735 case NE:
20736 case UNORDERED:
20737 case ORDERED:
20738 /* AVX has 3 operand comparisons, no need to swap anything. */
20739 if (TARGET_AVX)
20740 break;
20741 /* For commutative operators, try to canonicalize the destination
20742 operand to be first in the comparison - this helps reload to
20743 avoid extra moves. */
20744 if (!dest || !rtx_equal_p (dest, *pop1))
20745 break;
20746 /* FALLTHRU */
20748 case GE:
20749 case GT:
20750 case UNLE:
20751 case UNLT:
20752 /* These are not supported directly before AVX, and furthermore
20753 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20754 comparison operands to transform into something that is
20755 supported. */
20756 tmp = *pop0;
20757 *pop0 = *pop1;
20758 *pop1 = tmp;
20759 code = swap_condition (code);
20760 break;
20762 default:
20763 gcc_unreachable ();
20766 return code;
20769 /* Detect conditional moves that exactly match min/max operational
20770 semantics. Note that this is IEEE safe, as long as we don't
20771 interchange the operands.
20773 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20774 and TRUE if the operation is successful and instructions are emitted. */
20776 static bool
20777 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20778 rtx cmp_op1, rtx if_true, rtx if_false)
20780 enum machine_mode mode;
20781 bool is_min;
20782 rtx tmp;
20784 if (code == LT)
20786 else if (code == UNGE)
20788 tmp = if_true;
20789 if_true = if_false;
20790 if_false = tmp;
20792 else
20793 return false;
20795 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20796 is_min = true;
20797 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20798 is_min = false;
20799 else
20800 return false;
20802 mode = GET_MODE (dest);
20804 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20805 but MODE may be a vector mode and thus not appropriate. */
20806 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20808 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20809 rtvec v;
20811 if_true = force_reg (mode, if_true);
20812 v = gen_rtvec (2, if_true, if_false);
20813 tmp = gen_rtx_UNSPEC (mode, v, u);
20815 else
20817 code = is_min ? SMIN : SMAX;
20818 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20821 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20822 return true;
20825 /* Expand an sse vector comparison. Return the register with the result. */
20827 static rtx
20828 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20829 rtx op_true, rtx op_false)
20831 enum machine_mode mode = GET_MODE (dest);
20832 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20834 /* In general case result of comparison can differ from operands' type. */
20835 enum machine_mode cmp_mode;
20837 /* In AVX512F the result of comparison is an integer mask. */
20838 bool maskcmp = false;
20839 rtx x;
20841 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20843 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20844 gcc_assert (cmp_mode != BLKmode);
20846 maskcmp = true;
20848 else
20849 cmp_mode = cmp_ops_mode;
20852 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20853 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20854 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20856 if (optimize
20857 || reg_overlap_mentioned_p (dest, op_true)
20858 || reg_overlap_mentioned_p (dest, op_false))
20859 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20861 /* Compare patterns for int modes are unspec in AVX512F only. */
20862 if (maskcmp && (code == GT || code == EQ))
20864 rtx (*gen)(rtx, rtx, rtx);
20866 switch (cmp_ops_mode)
20868 case V16SImode:
20869 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20870 break;
20871 case V8DImode:
20872 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20873 break;
20874 default:
20875 gen = NULL;
20878 if (gen)
20880 emit_insn (gen (dest, cmp_op0, cmp_op1));
20881 return dest;
20884 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20886 if (cmp_mode != mode && !maskcmp)
20888 x = force_reg (cmp_ops_mode, x);
20889 convert_move (dest, x, false);
20891 else
20892 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20894 return dest;
20897 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20898 operations. This is used for both scalar and vector conditional moves. */
20900 static void
20901 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20903 enum machine_mode mode = GET_MODE (dest);
20904 enum machine_mode cmpmode = GET_MODE (cmp);
20906 /* In AVX512F the result of comparison is an integer mask. */
20907 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20909 rtx t2, t3, x;
20911 if (vector_all_ones_operand (op_true, mode)
20912 && rtx_equal_p (op_false, CONST0_RTX (mode))
20913 && !maskcmp)
20915 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20917 else if (op_false == CONST0_RTX (mode)
20918 && !maskcmp)
20920 op_true = force_reg (mode, op_true);
20921 x = gen_rtx_AND (mode, cmp, op_true);
20922 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20924 else if (op_true == CONST0_RTX (mode)
20925 && !maskcmp)
20927 op_false = force_reg (mode, op_false);
20928 x = gen_rtx_NOT (mode, cmp);
20929 x = gen_rtx_AND (mode, x, op_false);
20930 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20932 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20933 && !maskcmp)
20935 op_false = force_reg (mode, op_false);
20936 x = gen_rtx_IOR (mode, cmp, op_false);
20937 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20939 else if (TARGET_XOP
20940 && !maskcmp)
20942 op_true = force_reg (mode, op_true);
20944 if (!nonimmediate_operand (op_false, mode))
20945 op_false = force_reg (mode, op_false);
20947 emit_insn (gen_rtx_SET (mode, dest,
20948 gen_rtx_IF_THEN_ELSE (mode, cmp,
20949 op_true,
20950 op_false)));
20952 else
20954 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20955 rtx d = dest;
20957 if (!nonimmediate_operand (op_true, mode))
20958 op_true = force_reg (mode, op_true);
20960 op_false = force_reg (mode, op_false);
20962 switch (mode)
20964 case V4SFmode:
20965 if (TARGET_SSE4_1)
20966 gen = gen_sse4_1_blendvps;
20967 break;
20968 case V2DFmode:
20969 if (TARGET_SSE4_1)
20970 gen = gen_sse4_1_blendvpd;
20971 break;
20972 case V16QImode:
20973 case V8HImode:
20974 case V4SImode:
20975 case V2DImode:
20976 if (TARGET_SSE4_1)
20978 gen = gen_sse4_1_pblendvb;
20979 if (mode != V16QImode)
20980 d = gen_reg_rtx (V16QImode);
20981 op_false = gen_lowpart (V16QImode, op_false);
20982 op_true = gen_lowpart (V16QImode, op_true);
20983 cmp = gen_lowpart (V16QImode, cmp);
20985 break;
20986 case V8SFmode:
20987 if (TARGET_AVX)
20988 gen = gen_avx_blendvps256;
20989 break;
20990 case V4DFmode:
20991 if (TARGET_AVX)
20992 gen = gen_avx_blendvpd256;
20993 break;
20994 case V32QImode:
20995 case V16HImode:
20996 case V8SImode:
20997 case V4DImode:
20998 if (TARGET_AVX2)
21000 gen = gen_avx2_pblendvb;
21001 if (mode != V32QImode)
21002 d = gen_reg_rtx (V32QImode);
21003 op_false = gen_lowpart (V32QImode, op_false);
21004 op_true = gen_lowpart (V32QImode, op_true);
21005 cmp = gen_lowpart (V32QImode, cmp);
21007 break;
21009 case V16SImode:
21010 gen = gen_avx512f_blendmv16si;
21011 break;
21012 case V8DImode:
21013 gen = gen_avx512f_blendmv8di;
21014 break;
21015 case V8DFmode:
21016 gen = gen_avx512f_blendmv8df;
21017 break;
21018 case V16SFmode:
21019 gen = gen_avx512f_blendmv16sf;
21020 break;
21022 default:
21023 break;
21026 if (gen != NULL)
21028 emit_insn (gen (d, op_false, op_true, cmp));
21029 if (d != dest)
21030 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21032 else
21034 op_true = force_reg (mode, op_true);
21036 t2 = gen_reg_rtx (mode);
21037 if (optimize)
21038 t3 = gen_reg_rtx (mode);
21039 else
21040 t3 = dest;
21042 x = gen_rtx_AND (mode, op_true, cmp);
21043 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21045 x = gen_rtx_NOT (mode, cmp);
21046 x = gen_rtx_AND (mode, x, op_false);
21047 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21049 x = gen_rtx_IOR (mode, t3, t2);
21050 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21055 /* Expand a floating-point conditional move. Return true if successful. */
21057 bool
21058 ix86_expand_fp_movcc (rtx operands[])
21060 enum machine_mode mode = GET_MODE (operands[0]);
21061 enum rtx_code code = GET_CODE (operands[1]);
21062 rtx tmp, compare_op;
21063 rtx op0 = XEXP (operands[1], 0);
21064 rtx op1 = XEXP (operands[1], 1);
21066 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21068 enum machine_mode cmode;
21070 /* Since we've no cmove for sse registers, don't force bad register
21071 allocation just to gain access to it. Deny movcc when the
21072 comparison mode doesn't match the move mode. */
21073 cmode = GET_MODE (op0);
21074 if (cmode == VOIDmode)
21075 cmode = GET_MODE (op1);
21076 if (cmode != mode)
21077 return false;
21079 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21080 if (code == UNKNOWN)
21081 return false;
21083 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21084 operands[2], operands[3]))
21085 return true;
21087 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21088 operands[2], operands[3]);
21089 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21090 return true;
21093 if (GET_MODE (op0) == TImode
21094 || (GET_MODE (op0) == DImode
21095 && !TARGET_64BIT))
21096 return false;
21098 /* The floating point conditional move instructions don't directly
21099 support conditions resulting from a signed integer comparison. */
21101 compare_op = ix86_expand_compare (code, op0, op1);
21102 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21104 tmp = gen_reg_rtx (QImode);
21105 ix86_expand_setcc (tmp, code, op0, op1);
21107 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21110 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21111 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21112 operands[2], operands[3])));
21114 return true;
21117 /* Expand a floating-point vector conditional move; a vcond operation
21118 rather than a movcc operation. */
21120 bool
21121 ix86_expand_fp_vcond (rtx operands[])
21123 enum rtx_code code = GET_CODE (operands[3]);
21124 rtx cmp;
21126 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21127 &operands[4], &operands[5]);
21128 if (code == UNKNOWN)
21130 rtx temp;
21131 switch (GET_CODE (operands[3]))
21133 case LTGT:
21134 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21135 operands[5], operands[0], operands[0]);
21136 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21137 operands[5], operands[1], operands[2]);
21138 code = AND;
21139 break;
21140 case UNEQ:
21141 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21142 operands[5], operands[0], operands[0]);
21143 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21144 operands[5], operands[1], operands[2]);
21145 code = IOR;
21146 break;
21147 default:
21148 gcc_unreachable ();
21150 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21151 OPTAB_DIRECT);
21152 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21153 return true;
21156 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21157 operands[5], operands[1], operands[2]))
21158 return true;
21160 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21161 operands[1], operands[2]);
21162 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21163 return true;
21166 /* Expand a signed/unsigned integral vector conditional move. */
21168 bool
21169 ix86_expand_int_vcond (rtx operands[])
21171 enum machine_mode data_mode = GET_MODE (operands[0]);
21172 enum machine_mode mode = GET_MODE (operands[4]);
21173 enum rtx_code code = GET_CODE (operands[3]);
21174 bool negate = false;
21175 rtx x, cop0, cop1;
21177 cop0 = operands[4];
21178 cop1 = operands[5];
21180 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21181 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21182 if ((code == LT || code == GE)
21183 && data_mode == mode
21184 && cop1 == CONST0_RTX (mode)
21185 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21186 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21187 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21188 && (GET_MODE_SIZE (data_mode) == 16
21189 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21191 rtx negop = operands[2 - (code == LT)];
21192 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21193 if (negop == CONST1_RTX (data_mode))
21195 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21196 operands[0], 1, OPTAB_DIRECT);
21197 if (res != operands[0])
21198 emit_move_insn (operands[0], res);
21199 return true;
21201 else if (GET_MODE_INNER (data_mode) != DImode
21202 && vector_all_ones_operand (negop, data_mode))
21204 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21205 operands[0], 0, OPTAB_DIRECT);
21206 if (res != operands[0])
21207 emit_move_insn (operands[0], res);
21208 return true;
21212 if (!nonimmediate_operand (cop1, mode))
21213 cop1 = force_reg (mode, cop1);
21214 if (!general_operand (operands[1], data_mode))
21215 operands[1] = force_reg (data_mode, operands[1]);
21216 if (!general_operand (operands[2], data_mode))
21217 operands[2] = force_reg (data_mode, operands[2]);
21219 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21220 if (TARGET_XOP
21221 && (mode == V16QImode || mode == V8HImode
21222 || mode == V4SImode || mode == V2DImode))
21224 else
21226 /* Canonicalize the comparison to EQ, GT, GTU. */
21227 switch (code)
21229 case EQ:
21230 case GT:
21231 case GTU:
21232 break;
21234 case NE:
21235 case LE:
21236 case LEU:
21237 code = reverse_condition (code);
21238 negate = true;
21239 break;
21241 case GE:
21242 case GEU:
21243 code = reverse_condition (code);
21244 negate = true;
21245 /* FALLTHRU */
21247 case LT:
21248 case LTU:
21249 code = swap_condition (code);
21250 x = cop0, cop0 = cop1, cop1 = x;
21251 break;
21253 default:
21254 gcc_unreachable ();
21257 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21258 if (mode == V2DImode)
21260 switch (code)
21262 case EQ:
21263 /* SSE4.1 supports EQ. */
21264 if (!TARGET_SSE4_1)
21265 return false;
21266 break;
21268 case GT:
21269 case GTU:
21270 /* SSE4.2 supports GT/GTU. */
21271 if (!TARGET_SSE4_2)
21272 return false;
21273 break;
21275 default:
21276 gcc_unreachable ();
21280 /* Unsigned parallel compare is not supported by the hardware.
21281 Play some tricks to turn this into a signed comparison
21282 against 0. */
21283 if (code == GTU)
21285 cop0 = force_reg (mode, cop0);
21287 switch (mode)
21289 case V16SImode:
21290 case V8DImode:
21291 case V8SImode:
21292 case V4DImode:
21293 case V4SImode:
21294 case V2DImode:
21296 rtx t1, t2, mask;
21297 rtx (*gen_sub3) (rtx, rtx, rtx);
21299 switch (mode)
21301 case V16SImode: gen_sub3 = gen_subv16si3; break;
21302 case V8DImode: gen_sub3 = gen_subv8di3; break;
21303 case V8SImode: gen_sub3 = gen_subv8si3; break;
21304 case V4DImode: gen_sub3 = gen_subv4di3; break;
21305 case V4SImode: gen_sub3 = gen_subv4si3; break;
21306 case V2DImode: gen_sub3 = gen_subv2di3; break;
21307 default:
21308 gcc_unreachable ();
21310 /* Subtract (-(INT MAX) - 1) from both operands to make
21311 them signed. */
21312 mask = ix86_build_signbit_mask (mode, true, false);
21313 t1 = gen_reg_rtx (mode);
21314 emit_insn (gen_sub3 (t1, cop0, mask));
21316 t2 = gen_reg_rtx (mode);
21317 emit_insn (gen_sub3 (t2, cop1, mask));
21319 cop0 = t1;
21320 cop1 = t2;
21321 code = GT;
21323 break;
21325 case V32QImode:
21326 case V16HImode:
21327 case V16QImode:
21328 case V8HImode:
21329 /* Perform a parallel unsigned saturating subtraction. */
21330 x = gen_reg_rtx (mode);
21331 emit_insn (gen_rtx_SET (VOIDmode, x,
21332 gen_rtx_US_MINUS (mode, cop0, cop1)));
21334 cop0 = x;
21335 cop1 = CONST0_RTX (mode);
21336 code = EQ;
21337 negate = !negate;
21338 break;
21340 default:
21341 gcc_unreachable ();
21346 /* Allow the comparison to be done in one mode, but the movcc to
21347 happen in another mode. */
21348 if (data_mode == mode)
21350 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21351 operands[1+negate], operands[2-negate]);
21353 else
21355 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21356 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21357 operands[1+negate], operands[2-negate]);
21358 if (GET_MODE (x) == mode)
21359 x = gen_lowpart (data_mode, x);
21362 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21363 operands[2-negate]);
21364 return true;
21367 static bool
21368 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21370 enum machine_mode mode = GET_MODE (op0);
21371 switch (mode)
21373 case V16SImode:
21374 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21375 force_reg (V16SImode, mask),
21376 op1));
21377 return true;
21378 case V16SFmode:
21379 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21380 force_reg (V16SImode, mask),
21381 op1));
21382 return true;
21383 case V8DImode:
21384 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21385 force_reg (V8DImode, mask), op1));
21386 return true;
21387 case V8DFmode:
21388 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21389 force_reg (V8DImode, mask), op1));
21390 return true;
21391 default:
21392 return false;
21396 /* Expand a variable vector permutation. */
21398 void
21399 ix86_expand_vec_perm (rtx operands[])
21401 rtx target = operands[0];
21402 rtx op0 = operands[1];
21403 rtx op1 = operands[2];
21404 rtx mask = operands[3];
21405 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21406 enum machine_mode mode = GET_MODE (op0);
21407 enum machine_mode maskmode = GET_MODE (mask);
21408 int w, e, i;
21409 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21411 /* Number of elements in the vector. */
21412 w = GET_MODE_NUNITS (mode);
21413 e = GET_MODE_UNIT_SIZE (mode);
21414 gcc_assert (w <= 64);
21416 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21417 return;
21419 if (TARGET_AVX2)
21421 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21423 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21424 an constant shuffle operand. With a tiny bit of effort we can
21425 use VPERMD instead. A re-interpretation stall for V4DFmode is
21426 unfortunate but there's no avoiding it.
21427 Similarly for V16HImode we don't have instructions for variable
21428 shuffling, while for V32QImode we can use after preparing suitable
21429 masks vpshufb; vpshufb; vpermq; vpor. */
21431 if (mode == V16HImode)
21433 maskmode = mode = V32QImode;
21434 w = 32;
21435 e = 1;
21437 else
21439 maskmode = mode = V8SImode;
21440 w = 8;
21441 e = 4;
21443 t1 = gen_reg_rtx (maskmode);
21445 /* Replicate the low bits of the V4DImode mask into V8SImode:
21446 mask = { A B C D }
21447 t1 = { A A B B C C D D }. */
21448 for (i = 0; i < w / 2; ++i)
21449 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21450 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21451 vt = force_reg (maskmode, vt);
21452 mask = gen_lowpart (maskmode, mask);
21453 if (maskmode == V8SImode)
21454 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21455 else
21456 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21458 /* Multiply the shuffle indicies by two. */
21459 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21460 OPTAB_DIRECT);
21462 /* Add one to the odd shuffle indicies:
21463 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21464 for (i = 0; i < w / 2; ++i)
21466 vec[i * 2] = const0_rtx;
21467 vec[i * 2 + 1] = const1_rtx;
21469 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21470 vt = validize_mem (force_const_mem (maskmode, vt));
21471 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21472 OPTAB_DIRECT);
21474 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21475 operands[3] = mask = t1;
21476 target = gen_reg_rtx (mode);
21477 op0 = gen_lowpart (mode, op0);
21478 op1 = gen_lowpart (mode, op1);
21481 switch (mode)
21483 case V8SImode:
21484 /* The VPERMD and VPERMPS instructions already properly ignore
21485 the high bits of the shuffle elements. No need for us to
21486 perform an AND ourselves. */
21487 if (one_operand_shuffle)
21489 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21490 if (target != operands[0])
21491 emit_move_insn (operands[0],
21492 gen_lowpart (GET_MODE (operands[0]), target));
21494 else
21496 t1 = gen_reg_rtx (V8SImode);
21497 t2 = gen_reg_rtx (V8SImode);
21498 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21499 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21500 goto merge_two;
21502 return;
21504 case V8SFmode:
21505 mask = gen_lowpart (V8SImode, mask);
21506 if (one_operand_shuffle)
21507 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21508 else
21510 t1 = gen_reg_rtx (V8SFmode);
21511 t2 = gen_reg_rtx (V8SFmode);
21512 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21513 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21514 goto merge_two;
21516 return;
21518 case V4SImode:
21519 /* By combining the two 128-bit input vectors into one 256-bit
21520 input vector, we can use VPERMD and VPERMPS for the full
21521 two-operand shuffle. */
21522 t1 = gen_reg_rtx (V8SImode);
21523 t2 = gen_reg_rtx (V8SImode);
21524 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21525 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21526 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21527 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21528 return;
21530 case V4SFmode:
21531 t1 = gen_reg_rtx (V8SFmode);
21532 t2 = gen_reg_rtx (V8SImode);
21533 mask = gen_lowpart (V4SImode, mask);
21534 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21535 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21536 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21537 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21538 return;
21540 case V32QImode:
21541 t1 = gen_reg_rtx (V32QImode);
21542 t2 = gen_reg_rtx (V32QImode);
21543 t3 = gen_reg_rtx (V32QImode);
21544 vt2 = GEN_INT (128);
21545 for (i = 0; i < 32; i++)
21546 vec[i] = vt2;
21547 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21548 vt = force_reg (V32QImode, vt);
21549 for (i = 0; i < 32; i++)
21550 vec[i] = i < 16 ? vt2 : const0_rtx;
21551 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21552 vt2 = force_reg (V32QImode, vt2);
21553 /* From mask create two adjusted masks, which contain the same
21554 bits as mask in the low 7 bits of each vector element.
21555 The first mask will have the most significant bit clear
21556 if it requests element from the same 128-bit lane
21557 and MSB set if it requests element from the other 128-bit lane.
21558 The second mask will have the opposite values of the MSB,
21559 and additionally will have its 128-bit lanes swapped.
21560 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21561 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21562 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21563 stands for other 12 bytes. */
21564 /* The bit whether element is from the same lane or the other
21565 lane is bit 4, so shift it up by 3 to the MSB position. */
21566 t5 = gen_reg_rtx (V4DImode);
21567 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21568 GEN_INT (3)));
21569 /* Clear MSB bits from the mask just in case it had them set. */
21570 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21571 /* After this t1 will have MSB set for elements from other lane. */
21572 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21573 /* Clear bits other than MSB. */
21574 emit_insn (gen_andv32qi3 (t1, t1, vt));
21575 /* Or in the lower bits from mask into t3. */
21576 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21577 /* And invert MSB bits in t1, so MSB is set for elements from the same
21578 lane. */
21579 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21580 /* Swap 128-bit lanes in t3. */
21581 t6 = gen_reg_rtx (V4DImode);
21582 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21583 const2_rtx, GEN_INT (3),
21584 const0_rtx, const1_rtx));
21585 /* And or in the lower bits from mask into t1. */
21586 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21587 if (one_operand_shuffle)
21589 /* Each of these shuffles will put 0s in places where
21590 element from the other 128-bit lane is needed, otherwise
21591 will shuffle in the requested value. */
21592 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21593 gen_lowpart (V32QImode, t6)));
21594 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21595 /* For t3 the 128-bit lanes are swapped again. */
21596 t7 = gen_reg_rtx (V4DImode);
21597 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21598 const2_rtx, GEN_INT (3),
21599 const0_rtx, const1_rtx));
21600 /* And oring both together leads to the result. */
21601 emit_insn (gen_iorv32qi3 (target, t1,
21602 gen_lowpart (V32QImode, t7)));
21603 if (target != operands[0])
21604 emit_move_insn (operands[0],
21605 gen_lowpart (GET_MODE (operands[0]), target));
21606 return;
21609 t4 = gen_reg_rtx (V32QImode);
21610 /* Similarly to the above one_operand_shuffle code,
21611 just for repeated twice for each operand. merge_two:
21612 code will merge the two results together. */
21613 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21614 gen_lowpart (V32QImode, t6)));
21615 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21616 gen_lowpart (V32QImode, t6)));
21617 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21618 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21619 t7 = gen_reg_rtx (V4DImode);
21620 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21621 const2_rtx, GEN_INT (3),
21622 const0_rtx, const1_rtx));
21623 t8 = gen_reg_rtx (V4DImode);
21624 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21625 const2_rtx, GEN_INT (3),
21626 const0_rtx, const1_rtx));
21627 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21628 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21629 t1 = t4;
21630 t2 = t3;
21631 goto merge_two;
21633 default:
21634 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21635 break;
21639 if (TARGET_XOP)
21641 /* The XOP VPPERM insn supports three inputs. By ignoring the
21642 one_operand_shuffle special case, we avoid creating another
21643 set of constant vectors in memory. */
21644 one_operand_shuffle = false;
21646 /* mask = mask & {2*w-1, ...} */
21647 vt = GEN_INT (2*w - 1);
21649 else
21651 /* mask = mask & {w-1, ...} */
21652 vt = GEN_INT (w - 1);
21655 for (i = 0; i < w; i++)
21656 vec[i] = vt;
21657 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21658 mask = expand_simple_binop (maskmode, AND, mask, vt,
21659 NULL_RTX, 0, OPTAB_DIRECT);
21661 /* For non-QImode operations, convert the word permutation control
21662 into a byte permutation control. */
21663 if (mode != V16QImode)
21665 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21666 GEN_INT (exact_log2 (e)),
21667 NULL_RTX, 0, OPTAB_DIRECT);
21669 /* Convert mask to vector of chars. */
21670 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21672 /* Replicate each of the input bytes into byte positions:
21673 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21674 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21675 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21676 for (i = 0; i < 16; ++i)
21677 vec[i] = GEN_INT (i/e * e);
21678 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21679 vt = validize_mem (force_const_mem (V16QImode, vt));
21680 if (TARGET_XOP)
21681 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21682 else
21683 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21685 /* Convert it into the byte positions by doing
21686 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21687 for (i = 0; i < 16; ++i)
21688 vec[i] = GEN_INT (i % e);
21689 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21690 vt = validize_mem (force_const_mem (V16QImode, vt));
21691 emit_insn (gen_addv16qi3 (mask, mask, vt));
21694 /* The actual shuffle operations all operate on V16QImode. */
21695 op0 = gen_lowpart (V16QImode, op0);
21696 op1 = gen_lowpart (V16QImode, op1);
21698 if (TARGET_XOP)
21700 if (GET_MODE (target) != V16QImode)
21701 target = gen_reg_rtx (V16QImode);
21702 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21703 if (target != operands[0])
21704 emit_move_insn (operands[0],
21705 gen_lowpart (GET_MODE (operands[0]), target));
21707 else if (one_operand_shuffle)
21709 if (GET_MODE (target) != V16QImode)
21710 target = gen_reg_rtx (V16QImode);
21711 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21712 if (target != operands[0])
21713 emit_move_insn (operands[0],
21714 gen_lowpart (GET_MODE (operands[0]), target));
21716 else
21718 rtx xops[6];
21719 bool ok;
21721 /* Shuffle the two input vectors independently. */
21722 t1 = gen_reg_rtx (V16QImode);
21723 t2 = gen_reg_rtx (V16QImode);
21724 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21725 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21727 merge_two:
21728 /* Then merge them together. The key is whether any given control
21729 element contained a bit set that indicates the second word. */
21730 mask = operands[3];
21731 vt = GEN_INT (w);
21732 if (maskmode == V2DImode && !TARGET_SSE4_1)
21734 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21735 more shuffle to convert the V2DI input mask into a V4SI
21736 input mask. At which point the masking that expand_int_vcond
21737 will work as desired. */
21738 rtx t3 = gen_reg_rtx (V4SImode);
21739 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21740 const0_rtx, const0_rtx,
21741 const2_rtx, const2_rtx));
21742 mask = t3;
21743 maskmode = V4SImode;
21744 e = w = 4;
21747 for (i = 0; i < w; i++)
21748 vec[i] = vt;
21749 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21750 vt = force_reg (maskmode, vt);
21751 mask = expand_simple_binop (maskmode, AND, mask, vt,
21752 NULL_RTX, 0, OPTAB_DIRECT);
21754 if (GET_MODE (target) != mode)
21755 target = gen_reg_rtx (mode);
21756 xops[0] = target;
21757 xops[1] = gen_lowpart (mode, t2);
21758 xops[2] = gen_lowpart (mode, t1);
21759 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21760 xops[4] = mask;
21761 xops[5] = vt;
21762 ok = ix86_expand_int_vcond (xops);
21763 gcc_assert (ok);
21764 if (target != operands[0])
21765 emit_move_insn (operands[0],
21766 gen_lowpart (GET_MODE (operands[0]), target));
21770 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21771 true if we should do zero extension, else sign extension. HIGH_P is
21772 true if we want the N/2 high elements, else the low elements. */
21774 void
21775 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21777 enum machine_mode imode = GET_MODE (src);
21778 rtx tmp;
21780 if (TARGET_SSE4_1)
21782 rtx (*unpack)(rtx, rtx);
21783 rtx (*extract)(rtx, rtx) = NULL;
21784 enum machine_mode halfmode = BLKmode;
21786 switch (imode)
21788 case V32QImode:
21789 if (unsigned_p)
21790 unpack = gen_avx2_zero_extendv16qiv16hi2;
21791 else
21792 unpack = gen_avx2_sign_extendv16qiv16hi2;
21793 halfmode = V16QImode;
21794 extract
21795 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21796 break;
21797 case V32HImode:
21798 if (unsigned_p)
21799 unpack = gen_avx512f_zero_extendv16hiv16si2;
21800 else
21801 unpack = gen_avx512f_sign_extendv16hiv16si2;
21802 halfmode = V16HImode;
21803 extract
21804 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21805 break;
21806 case V16HImode:
21807 if (unsigned_p)
21808 unpack = gen_avx2_zero_extendv8hiv8si2;
21809 else
21810 unpack = gen_avx2_sign_extendv8hiv8si2;
21811 halfmode = V8HImode;
21812 extract
21813 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21814 break;
21815 case V16SImode:
21816 if (unsigned_p)
21817 unpack = gen_avx512f_zero_extendv8siv8di2;
21818 else
21819 unpack = gen_avx512f_sign_extendv8siv8di2;
21820 halfmode = V8SImode;
21821 extract
21822 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21823 break;
21824 case V8SImode:
21825 if (unsigned_p)
21826 unpack = gen_avx2_zero_extendv4siv4di2;
21827 else
21828 unpack = gen_avx2_sign_extendv4siv4di2;
21829 halfmode = V4SImode;
21830 extract
21831 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21832 break;
21833 case V16QImode:
21834 if (unsigned_p)
21835 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21836 else
21837 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21838 break;
21839 case V8HImode:
21840 if (unsigned_p)
21841 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21842 else
21843 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21844 break;
21845 case V4SImode:
21846 if (unsigned_p)
21847 unpack = gen_sse4_1_zero_extendv2siv2di2;
21848 else
21849 unpack = gen_sse4_1_sign_extendv2siv2di2;
21850 break;
21851 default:
21852 gcc_unreachable ();
21855 if (GET_MODE_SIZE (imode) >= 32)
21857 tmp = gen_reg_rtx (halfmode);
21858 emit_insn (extract (tmp, src));
21860 else if (high_p)
21862 /* Shift higher 8 bytes to lower 8 bytes. */
21863 tmp = gen_reg_rtx (V1TImode);
21864 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21865 GEN_INT (64)));
21866 tmp = gen_lowpart (imode, tmp);
21868 else
21869 tmp = src;
21871 emit_insn (unpack (dest, tmp));
21873 else
21875 rtx (*unpack)(rtx, rtx, rtx);
21877 switch (imode)
21879 case V16QImode:
21880 if (high_p)
21881 unpack = gen_vec_interleave_highv16qi;
21882 else
21883 unpack = gen_vec_interleave_lowv16qi;
21884 break;
21885 case V8HImode:
21886 if (high_p)
21887 unpack = gen_vec_interleave_highv8hi;
21888 else
21889 unpack = gen_vec_interleave_lowv8hi;
21890 break;
21891 case V4SImode:
21892 if (high_p)
21893 unpack = gen_vec_interleave_highv4si;
21894 else
21895 unpack = gen_vec_interleave_lowv4si;
21896 break;
21897 default:
21898 gcc_unreachable ();
21901 if (unsigned_p)
21902 tmp = force_reg (imode, CONST0_RTX (imode));
21903 else
21904 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21905 src, pc_rtx, pc_rtx);
21907 rtx tmp2 = gen_reg_rtx (imode);
21908 emit_insn (unpack (tmp2, src, tmp));
21909 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21913 /* Expand conditional increment or decrement using adb/sbb instructions.
21914 The default case using setcc followed by the conditional move can be
21915 done by generic code. */
21916 bool
21917 ix86_expand_int_addcc (rtx operands[])
21919 enum rtx_code code = GET_CODE (operands[1]);
21920 rtx flags;
21921 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21922 rtx compare_op;
21923 rtx val = const0_rtx;
21924 bool fpcmp = false;
21925 enum machine_mode mode;
21926 rtx op0 = XEXP (operands[1], 0);
21927 rtx op1 = XEXP (operands[1], 1);
21929 if (operands[3] != const1_rtx
21930 && operands[3] != constm1_rtx)
21931 return false;
21932 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21933 return false;
21934 code = GET_CODE (compare_op);
21936 flags = XEXP (compare_op, 0);
21938 if (GET_MODE (flags) == CCFPmode
21939 || GET_MODE (flags) == CCFPUmode)
21941 fpcmp = true;
21942 code = ix86_fp_compare_code_to_integer (code);
21945 if (code != LTU)
21947 val = constm1_rtx;
21948 if (fpcmp)
21949 PUT_CODE (compare_op,
21950 reverse_condition_maybe_unordered
21951 (GET_CODE (compare_op)));
21952 else
21953 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21956 mode = GET_MODE (operands[0]);
21958 /* Construct either adc or sbb insn. */
21959 if ((code == LTU) == (operands[3] == constm1_rtx))
21961 switch (mode)
21963 case QImode:
21964 insn = gen_subqi3_carry;
21965 break;
21966 case HImode:
21967 insn = gen_subhi3_carry;
21968 break;
21969 case SImode:
21970 insn = gen_subsi3_carry;
21971 break;
21972 case DImode:
21973 insn = gen_subdi3_carry;
21974 break;
21975 default:
21976 gcc_unreachable ();
21979 else
21981 switch (mode)
21983 case QImode:
21984 insn = gen_addqi3_carry;
21985 break;
21986 case HImode:
21987 insn = gen_addhi3_carry;
21988 break;
21989 case SImode:
21990 insn = gen_addsi3_carry;
21991 break;
21992 case DImode:
21993 insn = gen_adddi3_carry;
21994 break;
21995 default:
21996 gcc_unreachable ();
21999 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22001 return true;
22005 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22006 but works for floating pointer parameters and nonoffsetable memories.
22007 For pushes, it returns just stack offsets; the values will be saved
22008 in the right order. Maximally three parts are generated. */
22010 static int
22011 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22013 int size;
22015 if (!TARGET_64BIT)
22016 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22017 else
22018 size = (GET_MODE_SIZE (mode) + 4) / 8;
22020 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22021 gcc_assert (size >= 2 && size <= 4);
22023 /* Optimize constant pool reference to immediates. This is used by fp
22024 moves, that force all constants to memory to allow combining. */
22025 if (MEM_P (operand) && MEM_READONLY_P (operand))
22027 rtx tmp = maybe_get_pool_constant (operand);
22028 if (tmp)
22029 operand = tmp;
22032 if (MEM_P (operand) && !offsettable_memref_p (operand))
22034 /* The only non-offsetable memories we handle are pushes. */
22035 int ok = push_operand (operand, VOIDmode);
22037 gcc_assert (ok);
22039 operand = copy_rtx (operand);
22040 PUT_MODE (operand, word_mode);
22041 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22042 return size;
22045 if (GET_CODE (operand) == CONST_VECTOR)
22047 enum machine_mode imode = int_mode_for_mode (mode);
22048 /* Caution: if we looked through a constant pool memory above,
22049 the operand may actually have a different mode now. That's
22050 ok, since we want to pun this all the way back to an integer. */
22051 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22052 gcc_assert (operand != NULL);
22053 mode = imode;
22056 if (!TARGET_64BIT)
22058 if (mode == DImode)
22059 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22060 else
22062 int i;
22064 if (REG_P (operand))
22066 gcc_assert (reload_completed);
22067 for (i = 0; i < size; i++)
22068 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22070 else if (offsettable_memref_p (operand))
22072 operand = adjust_address (operand, SImode, 0);
22073 parts[0] = operand;
22074 for (i = 1; i < size; i++)
22075 parts[i] = adjust_address (operand, SImode, 4 * i);
22077 else if (GET_CODE (operand) == CONST_DOUBLE)
22079 REAL_VALUE_TYPE r;
22080 long l[4];
22082 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22083 switch (mode)
22085 case TFmode:
22086 real_to_target (l, &r, mode);
22087 parts[3] = gen_int_mode (l[3], SImode);
22088 parts[2] = gen_int_mode (l[2], SImode);
22089 break;
22090 case XFmode:
22091 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22092 long double may not be 80-bit. */
22093 real_to_target (l, &r, mode);
22094 parts[2] = gen_int_mode (l[2], SImode);
22095 break;
22096 case DFmode:
22097 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22098 break;
22099 default:
22100 gcc_unreachable ();
22102 parts[1] = gen_int_mode (l[1], SImode);
22103 parts[0] = gen_int_mode (l[0], SImode);
22105 else
22106 gcc_unreachable ();
22109 else
22111 if (mode == TImode)
22112 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22113 if (mode == XFmode || mode == TFmode)
22115 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22116 if (REG_P (operand))
22118 gcc_assert (reload_completed);
22119 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22120 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22122 else if (offsettable_memref_p (operand))
22124 operand = adjust_address (operand, DImode, 0);
22125 parts[0] = operand;
22126 parts[1] = adjust_address (operand, upper_mode, 8);
22128 else if (GET_CODE (operand) == CONST_DOUBLE)
22130 REAL_VALUE_TYPE r;
22131 long l[4];
22133 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22134 real_to_target (l, &r, mode);
22136 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22137 if (HOST_BITS_PER_WIDE_INT >= 64)
22138 parts[0]
22139 = gen_int_mode
22140 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22141 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22142 DImode);
22143 else
22144 parts[0] = immed_double_const (l[0], l[1], DImode);
22146 if (upper_mode == SImode)
22147 parts[1] = gen_int_mode (l[2], SImode);
22148 else if (HOST_BITS_PER_WIDE_INT >= 64)
22149 parts[1]
22150 = gen_int_mode
22151 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22152 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22153 DImode);
22154 else
22155 parts[1] = immed_double_const (l[2], l[3], DImode);
22157 else
22158 gcc_unreachable ();
22162 return size;
22165 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22166 Return false when normal moves are needed; true when all required
22167 insns have been emitted. Operands 2-4 contain the input values
22168 int the correct order; operands 5-7 contain the output values. */
22170 void
22171 ix86_split_long_move (rtx operands[])
22173 rtx part[2][4];
22174 int nparts, i, j;
22175 int push = 0;
22176 int collisions = 0;
22177 enum machine_mode mode = GET_MODE (operands[0]);
22178 bool collisionparts[4];
22180 /* The DFmode expanders may ask us to move double.
22181 For 64bit target this is single move. By hiding the fact
22182 here we simplify i386.md splitters. */
22183 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22185 /* Optimize constant pool reference to immediates. This is used by
22186 fp moves, that force all constants to memory to allow combining. */
22188 if (MEM_P (operands[1])
22189 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22190 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22191 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22192 if (push_operand (operands[0], VOIDmode))
22194 operands[0] = copy_rtx (operands[0]);
22195 PUT_MODE (operands[0], word_mode);
22197 else
22198 operands[0] = gen_lowpart (DImode, operands[0]);
22199 operands[1] = gen_lowpart (DImode, operands[1]);
22200 emit_move_insn (operands[0], operands[1]);
22201 return;
22204 /* The only non-offsettable memory we handle is push. */
22205 if (push_operand (operands[0], VOIDmode))
22206 push = 1;
22207 else
22208 gcc_assert (!MEM_P (operands[0])
22209 || offsettable_memref_p (operands[0]));
22211 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22212 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22214 /* When emitting push, take care for source operands on the stack. */
22215 if (push && MEM_P (operands[1])
22216 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22218 rtx src_base = XEXP (part[1][nparts - 1], 0);
22220 /* Compensate for the stack decrement by 4. */
22221 if (!TARGET_64BIT && nparts == 3
22222 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22223 src_base = plus_constant (Pmode, src_base, 4);
22225 /* src_base refers to the stack pointer and is
22226 automatically decreased by emitted push. */
22227 for (i = 0; i < nparts; i++)
22228 part[1][i] = change_address (part[1][i],
22229 GET_MODE (part[1][i]), src_base);
22232 /* We need to do copy in the right order in case an address register
22233 of the source overlaps the destination. */
22234 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22236 rtx tmp;
22238 for (i = 0; i < nparts; i++)
22240 collisionparts[i]
22241 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22242 if (collisionparts[i])
22243 collisions++;
22246 /* Collision in the middle part can be handled by reordering. */
22247 if (collisions == 1 && nparts == 3 && collisionparts [1])
22249 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22250 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22252 else if (collisions == 1
22253 && nparts == 4
22254 && (collisionparts [1] || collisionparts [2]))
22256 if (collisionparts [1])
22258 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22259 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22261 else
22263 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22264 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22268 /* If there are more collisions, we can't handle it by reordering.
22269 Do an lea to the last part and use only one colliding move. */
22270 else if (collisions > 1)
22272 rtx base;
22274 collisions = 1;
22276 base = part[0][nparts - 1];
22278 /* Handle the case when the last part isn't valid for lea.
22279 Happens in 64-bit mode storing the 12-byte XFmode. */
22280 if (GET_MODE (base) != Pmode)
22281 base = gen_rtx_REG (Pmode, REGNO (base));
22283 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22284 part[1][0] = replace_equiv_address (part[1][0], base);
22285 for (i = 1; i < nparts; i++)
22287 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22288 part[1][i] = replace_equiv_address (part[1][i], tmp);
22293 if (push)
22295 if (!TARGET_64BIT)
22297 if (nparts == 3)
22299 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22300 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22301 stack_pointer_rtx, GEN_INT (-4)));
22302 emit_move_insn (part[0][2], part[1][2]);
22304 else if (nparts == 4)
22306 emit_move_insn (part[0][3], part[1][3]);
22307 emit_move_insn (part[0][2], part[1][2]);
22310 else
22312 /* In 64bit mode we don't have 32bit push available. In case this is
22313 register, it is OK - we will just use larger counterpart. We also
22314 retype memory - these comes from attempt to avoid REX prefix on
22315 moving of second half of TFmode value. */
22316 if (GET_MODE (part[1][1]) == SImode)
22318 switch (GET_CODE (part[1][1]))
22320 case MEM:
22321 part[1][1] = adjust_address (part[1][1], DImode, 0);
22322 break;
22324 case REG:
22325 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22326 break;
22328 default:
22329 gcc_unreachable ();
22332 if (GET_MODE (part[1][0]) == SImode)
22333 part[1][0] = part[1][1];
22336 emit_move_insn (part[0][1], part[1][1]);
22337 emit_move_insn (part[0][0], part[1][0]);
22338 return;
22341 /* Choose correct order to not overwrite the source before it is copied. */
22342 if ((REG_P (part[0][0])
22343 && REG_P (part[1][1])
22344 && (REGNO (part[0][0]) == REGNO (part[1][1])
22345 || (nparts == 3
22346 && REGNO (part[0][0]) == REGNO (part[1][2]))
22347 || (nparts == 4
22348 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22349 || (collisions > 0
22350 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22352 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22354 operands[2 + i] = part[0][j];
22355 operands[6 + i] = part[1][j];
22358 else
22360 for (i = 0; i < nparts; i++)
22362 operands[2 + i] = part[0][i];
22363 operands[6 + i] = part[1][i];
22367 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22368 if (optimize_insn_for_size_p ())
22370 for (j = 0; j < nparts - 1; j++)
22371 if (CONST_INT_P (operands[6 + j])
22372 && operands[6 + j] != const0_rtx
22373 && REG_P (operands[2 + j]))
22374 for (i = j; i < nparts - 1; i++)
22375 if (CONST_INT_P (operands[7 + i])
22376 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22377 operands[7 + i] = operands[2 + j];
22380 for (i = 0; i < nparts; i++)
22381 emit_move_insn (operands[2 + i], operands[6 + i]);
22383 return;
22386 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22387 left shift by a constant, either using a single shift or
22388 a sequence of add instructions. */
22390 static void
22391 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22393 rtx (*insn)(rtx, rtx, rtx);
22395 if (count == 1
22396 || (count * ix86_cost->add <= ix86_cost->shift_const
22397 && !optimize_insn_for_size_p ()))
22399 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22400 while (count-- > 0)
22401 emit_insn (insn (operand, operand, operand));
22403 else
22405 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22406 emit_insn (insn (operand, operand, GEN_INT (count)));
22410 void
22411 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22413 rtx (*gen_ashl3)(rtx, rtx, rtx);
22414 rtx (*gen_shld)(rtx, rtx, rtx);
22415 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22417 rtx low[2], high[2];
22418 int count;
22420 if (CONST_INT_P (operands[2]))
22422 split_double_mode (mode, operands, 2, low, high);
22423 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22425 if (count >= half_width)
22427 emit_move_insn (high[0], low[1]);
22428 emit_move_insn (low[0], const0_rtx);
22430 if (count > half_width)
22431 ix86_expand_ashl_const (high[0], count - half_width, mode);
22433 else
22435 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22437 if (!rtx_equal_p (operands[0], operands[1]))
22438 emit_move_insn (operands[0], operands[1]);
22440 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22441 ix86_expand_ashl_const (low[0], count, mode);
22443 return;
22446 split_double_mode (mode, operands, 1, low, high);
22448 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22450 if (operands[1] == const1_rtx)
22452 /* Assuming we've chosen a QImode capable registers, then 1 << N
22453 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22454 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22456 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22458 ix86_expand_clear (low[0]);
22459 ix86_expand_clear (high[0]);
22460 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22462 d = gen_lowpart (QImode, low[0]);
22463 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22464 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22465 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22467 d = gen_lowpart (QImode, high[0]);
22468 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22469 s = gen_rtx_NE (QImode, flags, const0_rtx);
22470 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22473 /* Otherwise, we can get the same results by manually performing
22474 a bit extract operation on bit 5/6, and then performing the two
22475 shifts. The two methods of getting 0/1 into low/high are exactly
22476 the same size. Avoiding the shift in the bit extract case helps
22477 pentium4 a bit; no one else seems to care much either way. */
22478 else
22480 enum machine_mode half_mode;
22481 rtx (*gen_lshr3)(rtx, rtx, rtx);
22482 rtx (*gen_and3)(rtx, rtx, rtx);
22483 rtx (*gen_xor3)(rtx, rtx, rtx);
22484 HOST_WIDE_INT bits;
22485 rtx x;
22487 if (mode == DImode)
22489 half_mode = SImode;
22490 gen_lshr3 = gen_lshrsi3;
22491 gen_and3 = gen_andsi3;
22492 gen_xor3 = gen_xorsi3;
22493 bits = 5;
22495 else
22497 half_mode = DImode;
22498 gen_lshr3 = gen_lshrdi3;
22499 gen_and3 = gen_anddi3;
22500 gen_xor3 = gen_xordi3;
22501 bits = 6;
22504 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22505 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22506 else
22507 x = gen_lowpart (half_mode, operands[2]);
22508 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22510 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22511 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22512 emit_move_insn (low[0], high[0]);
22513 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22516 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22517 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22518 return;
22521 if (operands[1] == constm1_rtx)
22523 /* For -1 << N, we can avoid the shld instruction, because we
22524 know that we're shifting 0...31/63 ones into a -1. */
22525 emit_move_insn (low[0], constm1_rtx);
22526 if (optimize_insn_for_size_p ())
22527 emit_move_insn (high[0], low[0]);
22528 else
22529 emit_move_insn (high[0], constm1_rtx);
22531 else
22533 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22535 if (!rtx_equal_p (operands[0], operands[1]))
22536 emit_move_insn (operands[0], operands[1]);
22538 split_double_mode (mode, operands, 1, low, high);
22539 emit_insn (gen_shld (high[0], low[0], operands[2]));
22542 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22544 if (TARGET_CMOVE && scratch)
22546 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22547 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22549 ix86_expand_clear (scratch);
22550 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22552 else
22554 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22555 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22557 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22561 void
22562 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22564 rtx (*gen_ashr3)(rtx, rtx, rtx)
22565 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22566 rtx (*gen_shrd)(rtx, rtx, rtx);
22567 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22569 rtx low[2], high[2];
22570 int count;
22572 if (CONST_INT_P (operands[2]))
22574 split_double_mode (mode, operands, 2, low, high);
22575 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22577 if (count == GET_MODE_BITSIZE (mode) - 1)
22579 emit_move_insn (high[0], high[1]);
22580 emit_insn (gen_ashr3 (high[0], high[0],
22581 GEN_INT (half_width - 1)));
22582 emit_move_insn (low[0], high[0]);
22585 else if (count >= half_width)
22587 emit_move_insn (low[0], high[1]);
22588 emit_move_insn (high[0], low[0]);
22589 emit_insn (gen_ashr3 (high[0], high[0],
22590 GEN_INT (half_width - 1)));
22592 if (count > half_width)
22593 emit_insn (gen_ashr3 (low[0], low[0],
22594 GEN_INT (count - half_width)));
22596 else
22598 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22600 if (!rtx_equal_p (operands[0], operands[1]))
22601 emit_move_insn (operands[0], operands[1]);
22603 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22604 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22607 else
22609 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22611 if (!rtx_equal_p (operands[0], operands[1]))
22612 emit_move_insn (operands[0], operands[1]);
22614 split_double_mode (mode, operands, 1, low, high);
22616 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22617 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22619 if (TARGET_CMOVE && scratch)
22621 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22622 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22624 emit_move_insn (scratch, high[0]);
22625 emit_insn (gen_ashr3 (scratch, scratch,
22626 GEN_INT (half_width - 1)));
22627 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22628 scratch));
22630 else
22632 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22633 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22635 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22640 void
22641 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22643 rtx (*gen_lshr3)(rtx, rtx, rtx)
22644 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22645 rtx (*gen_shrd)(rtx, rtx, rtx);
22646 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22648 rtx low[2], high[2];
22649 int count;
22651 if (CONST_INT_P (operands[2]))
22653 split_double_mode (mode, operands, 2, low, high);
22654 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22656 if (count >= half_width)
22658 emit_move_insn (low[0], high[1]);
22659 ix86_expand_clear (high[0]);
22661 if (count > half_width)
22662 emit_insn (gen_lshr3 (low[0], low[0],
22663 GEN_INT (count - half_width)));
22665 else
22667 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22669 if (!rtx_equal_p (operands[0], operands[1]))
22670 emit_move_insn (operands[0], operands[1]);
22672 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22673 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22676 else
22678 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22680 if (!rtx_equal_p (operands[0], operands[1]))
22681 emit_move_insn (operands[0], operands[1]);
22683 split_double_mode (mode, operands, 1, low, high);
22685 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22686 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22688 if (TARGET_CMOVE && scratch)
22690 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22691 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22693 ix86_expand_clear (scratch);
22694 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22695 scratch));
22697 else
22699 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22700 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22702 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22707 /* Predict just emitted jump instruction to be taken with probability PROB. */
22708 static void
22709 predict_jump (int prob)
22711 rtx insn = get_last_insn ();
22712 gcc_assert (JUMP_P (insn));
22713 add_int_reg_note (insn, REG_BR_PROB, prob);
22716 /* Helper function for the string operations below. Dest VARIABLE whether
22717 it is aligned to VALUE bytes. If true, jump to the label. */
22718 static rtx
22719 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22721 rtx label = gen_label_rtx ();
22722 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22723 if (GET_MODE (variable) == DImode)
22724 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22725 else
22726 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22727 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22728 1, label);
22729 if (epilogue)
22730 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22731 else
22732 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22733 return label;
22736 /* Adjust COUNTER by the VALUE. */
22737 static void
22738 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22740 rtx (*gen_add)(rtx, rtx, rtx)
22741 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22743 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22746 /* Zero extend possibly SImode EXP to Pmode register. */
22748 ix86_zero_extend_to_Pmode (rtx exp)
22750 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22753 /* Divide COUNTREG by SCALE. */
22754 static rtx
22755 scale_counter (rtx countreg, int scale)
22757 rtx sc;
22759 if (scale == 1)
22760 return countreg;
22761 if (CONST_INT_P (countreg))
22762 return GEN_INT (INTVAL (countreg) / scale);
22763 gcc_assert (REG_P (countreg));
22765 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22766 GEN_INT (exact_log2 (scale)),
22767 NULL, 1, OPTAB_DIRECT);
22768 return sc;
22771 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22772 DImode for constant loop counts. */
22774 static enum machine_mode
22775 counter_mode (rtx count_exp)
22777 if (GET_MODE (count_exp) != VOIDmode)
22778 return GET_MODE (count_exp);
22779 if (!CONST_INT_P (count_exp))
22780 return Pmode;
22781 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22782 return DImode;
22783 return SImode;
22786 /* Copy the address to a Pmode register. This is used for x32 to
22787 truncate DImode TLS address to a SImode register. */
22789 static rtx
22790 ix86_copy_addr_to_reg (rtx addr)
22792 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22793 return copy_addr_to_reg (addr);
22794 else
22796 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22797 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22801 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22802 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22803 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22804 memory by VALUE (supposed to be in MODE).
22806 The size is rounded down to whole number of chunk size moved at once.
22807 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22810 static void
22811 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22812 rtx destptr, rtx srcptr, rtx value,
22813 rtx count, enum machine_mode mode, int unroll,
22814 int expected_size, bool issetmem)
22816 rtx out_label, top_label, iter, tmp;
22817 enum machine_mode iter_mode = counter_mode (count);
22818 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22819 rtx piece_size = GEN_INT (piece_size_n);
22820 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22821 rtx size;
22822 int i;
22824 top_label = gen_label_rtx ();
22825 out_label = gen_label_rtx ();
22826 iter = gen_reg_rtx (iter_mode);
22828 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22829 NULL, 1, OPTAB_DIRECT);
22830 /* Those two should combine. */
22831 if (piece_size == const1_rtx)
22833 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22834 true, out_label);
22835 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22837 emit_move_insn (iter, const0_rtx);
22839 emit_label (top_label);
22841 tmp = convert_modes (Pmode, iter_mode, iter, true);
22843 /* This assert could be relaxed - in this case we'll need to compute
22844 smallest power of two, containing in PIECE_SIZE_N and pass it to
22845 offset_address. */
22846 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22847 destmem = offset_address (destmem, tmp, piece_size_n);
22848 destmem = adjust_address (destmem, mode, 0);
22850 if (!issetmem)
22852 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22853 srcmem = adjust_address (srcmem, mode, 0);
22855 /* When unrolling for chips that reorder memory reads and writes,
22856 we can save registers by using single temporary.
22857 Also using 4 temporaries is overkill in 32bit mode. */
22858 if (!TARGET_64BIT && 0)
22860 for (i = 0; i < unroll; i++)
22862 if (i)
22864 destmem =
22865 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22866 srcmem =
22867 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22869 emit_move_insn (destmem, srcmem);
22872 else
22874 rtx tmpreg[4];
22875 gcc_assert (unroll <= 4);
22876 for (i = 0; i < unroll; i++)
22878 tmpreg[i] = gen_reg_rtx (mode);
22879 if (i)
22881 srcmem =
22882 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22884 emit_move_insn (tmpreg[i], srcmem);
22886 for (i = 0; i < unroll; i++)
22888 if (i)
22890 destmem =
22891 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22893 emit_move_insn (destmem, tmpreg[i]);
22897 else
22898 for (i = 0; i < unroll; i++)
22900 if (i)
22901 destmem =
22902 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22903 emit_move_insn (destmem, value);
22906 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22907 true, OPTAB_LIB_WIDEN);
22908 if (tmp != iter)
22909 emit_move_insn (iter, tmp);
22911 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22912 true, top_label);
22913 if (expected_size != -1)
22915 expected_size /= GET_MODE_SIZE (mode) * unroll;
22916 if (expected_size == 0)
22917 predict_jump (0);
22918 else if (expected_size > REG_BR_PROB_BASE)
22919 predict_jump (REG_BR_PROB_BASE - 1);
22920 else
22921 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22923 else
22924 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22925 iter = ix86_zero_extend_to_Pmode (iter);
22926 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22927 true, OPTAB_LIB_WIDEN);
22928 if (tmp != destptr)
22929 emit_move_insn (destptr, tmp);
22930 if (!issetmem)
22932 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22933 true, OPTAB_LIB_WIDEN);
22934 if (tmp != srcptr)
22935 emit_move_insn (srcptr, tmp);
22937 emit_label (out_label);
22940 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22941 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22942 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22943 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22944 ORIG_VALUE is the original value passed to memset to fill the memory with.
22945 Other arguments have same meaning as for previous function. */
22947 static void
22948 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22949 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22950 rtx count,
22951 enum machine_mode mode, bool issetmem)
22953 rtx destexp;
22954 rtx srcexp;
22955 rtx countreg;
22956 HOST_WIDE_INT rounded_count;
22958 /* If possible, it is shorter to use rep movs.
22959 TODO: Maybe it is better to move this logic to decide_alg. */
22960 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22961 && (!issetmem || orig_value == const0_rtx))
22962 mode = SImode;
22964 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22965 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22967 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22968 GET_MODE_SIZE (mode)));
22969 if (mode != QImode)
22971 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22972 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22973 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22975 else
22976 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22977 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22979 rounded_count = (INTVAL (count)
22980 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22981 destmem = shallow_copy_rtx (destmem);
22982 set_mem_size (destmem, rounded_count);
22984 else if (MEM_SIZE_KNOWN_P (destmem))
22985 clear_mem_size (destmem);
22987 if (issetmem)
22989 value = force_reg (mode, gen_lowpart (mode, value));
22990 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22992 else
22994 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22995 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22996 if (mode != QImode)
22998 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22999 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23000 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23002 else
23003 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23004 if (CONST_INT_P (count))
23006 rounded_count = (INTVAL (count)
23007 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23008 srcmem = shallow_copy_rtx (srcmem);
23009 set_mem_size (srcmem, rounded_count);
23011 else
23013 if (MEM_SIZE_KNOWN_P (srcmem))
23014 clear_mem_size (srcmem);
23016 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23017 destexp, srcexp));
23021 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23022 DESTMEM.
23023 SRC is passed by pointer to be updated on return.
23024 Return value is updated DST. */
23025 static rtx
23026 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23027 HOST_WIDE_INT size_to_move)
23029 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23030 enum insn_code code;
23031 enum machine_mode move_mode;
23032 int piece_size, i;
23034 /* Find the widest mode in which we could perform moves.
23035 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23036 it until move of such size is supported. */
23037 piece_size = 1 << floor_log2 (size_to_move);
23038 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23039 code = optab_handler (mov_optab, move_mode);
23040 while (code == CODE_FOR_nothing && piece_size > 1)
23042 piece_size >>= 1;
23043 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23044 code = optab_handler (mov_optab, move_mode);
23047 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23048 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23049 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23051 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23052 move_mode = mode_for_vector (word_mode, nunits);
23053 code = optab_handler (mov_optab, move_mode);
23054 if (code == CODE_FOR_nothing)
23056 move_mode = word_mode;
23057 piece_size = GET_MODE_SIZE (move_mode);
23058 code = optab_handler (mov_optab, move_mode);
23061 gcc_assert (code != CODE_FOR_nothing);
23063 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23064 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23066 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23067 gcc_assert (size_to_move % piece_size == 0);
23068 adjust = GEN_INT (piece_size);
23069 for (i = 0; i < size_to_move; i += piece_size)
23071 /* We move from memory to memory, so we'll need to do it via
23072 a temporary register. */
23073 tempreg = gen_reg_rtx (move_mode);
23074 emit_insn (GEN_FCN (code) (tempreg, src));
23075 emit_insn (GEN_FCN (code) (dst, tempreg));
23077 emit_move_insn (destptr,
23078 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23079 emit_move_insn (srcptr,
23080 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23082 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23083 piece_size);
23084 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23085 piece_size);
23088 /* Update DST and SRC rtx. */
23089 *srcmem = src;
23090 return dst;
23093 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23094 static void
23095 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23096 rtx destptr, rtx srcptr, rtx count, int max_size)
23098 rtx src, dest;
23099 if (CONST_INT_P (count))
23101 HOST_WIDE_INT countval = INTVAL (count);
23102 HOST_WIDE_INT epilogue_size = countval % max_size;
23103 int i;
23105 /* For now MAX_SIZE should be a power of 2. This assert could be
23106 relaxed, but it'll require a bit more complicated epilogue
23107 expanding. */
23108 gcc_assert ((max_size & (max_size - 1)) == 0);
23109 for (i = max_size; i >= 1; i >>= 1)
23111 if (epilogue_size & i)
23112 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23114 return;
23116 if (max_size > 8)
23118 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23119 count, 1, OPTAB_DIRECT);
23120 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23121 count, QImode, 1, 4, false);
23122 return;
23125 /* When there are stringops, we can cheaply increase dest and src pointers.
23126 Otherwise we save code size by maintaining offset (zero is readily
23127 available from preceding rep operation) and using x86 addressing modes.
23129 if (TARGET_SINGLE_STRINGOP)
23131 if (max_size > 4)
23133 rtx label = ix86_expand_aligntest (count, 4, true);
23134 src = change_address (srcmem, SImode, srcptr);
23135 dest = change_address (destmem, SImode, destptr);
23136 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23137 emit_label (label);
23138 LABEL_NUSES (label) = 1;
23140 if (max_size > 2)
23142 rtx label = ix86_expand_aligntest (count, 2, true);
23143 src = change_address (srcmem, HImode, srcptr);
23144 dest = change_address (destmem, HImode, destptr);
23145 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23146 emit_label (label);
23147 LABEL_NUSES (label) = 1;
23149 if (max_size > 1)
23151 rtx label = ix86_expand_aligntest (count, 1, true);
23152 src = change_address (srcmem, QImode, srcptr);
23153 dest = change_address (destmem, QImode, destptr);
23154 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23155 emit_label (label);
23156 LABEL_NUSES (label) = 1;
23159 else
23161 rtx offset = force_reg (Pmode, const0_rtx);
23162 rtx tmp;
23164 if (max_size > 4)
23166 rtx label = ix86_expand_aligntest (count, 4, true);
23167 src = change_address (srcmem, SImode, srcptr);
23168 dest = change_address (destmem, SImode, destptr);
23169 emit_move_insn (dest, src);
23170 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23171 true, OPTAB_LIB_WIDEN);
23172 if (tmp != offset)
23173 emit_move_insn (offset, tmp);
23174 emit_label (label);
23175 LABEL_NUSES (label) = 1;
23177 if (max_size > 2)
23179 rtx label = ix86_expand_aligntest (count, 2, true);
23180 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23181 src = change_address (srcmem, HImode, tmp);
23182 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23183 dest = change_address (destmem, HImode, tmp);
23184 emit_move_insn (dest, src);
23185 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23186 true, OPTAB_LIB_WIDEN);
23187 if (tmp != offset)
23188 emit_move_insn (offset, tmp);
23189 emit_label (label);
23190 LABEL_NUSES (label) = 1;
23192 if (max_size > 1)
23194 rtx label = ix86_expand_aligntest (count, 1, true);
23195 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23196 src = change_address (srcmem, QImode, tmp);
23197 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23198 dest = change_address (destmem, QImode, tmp);
23199 emit_move_insn (dest, src);
23200 emit_label (label);
23201 LABEL_NUSES (label) = 1;
23206 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23207 with value PROMOTED_VAL.
23208 SRC is passed by pointer to be updated on return.
23209 Return value is updated DST. */
23210 static rtx
23211 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23212 HOST_WIDE_INT size_to_move)
23214 rtx dst = destmem, adjust;
23215 enum insn_code code;
23216 enum machine_mode move_mode;
23217 int piece_size, i;
23219 /* Find the widest mode in which we could perform moves.
23220 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23221 it until move of such size is supported. */
23222 move_mode = GET_MODE (promoted_val);
23223 if (move_mode == VOIDmode)
23224 move_mode = QImode;
23225 if (size_to_move < GET_MODE_SIZE (move_mode))
23227 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23228 promoted_val = gen_lowpart (move_mode, promoted_val);
23230 piece_size = GET_MODE_SIZE (move_mode);
23231 code = optab_handler (mov_optab, move_mode);
23232 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23234 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23236 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23237 gcc_assert (size_to_move % piece_size == 0);
23238 adjust = GEN_INT (piece_size);
23239 for (i = 0; i < size_to_move; i += piece_size)
23241 if (piece_size <= GET_MODE_SIZE (word_mode))
23243 emit_insn (gen_strset (destptr, dst, promoted_val));
23244 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23245 piece_size);
23246 continue;
23249 emit_insn (GEN_FCN (code) (dst, promoted_val));
23251 emit_move_insn (destptr,
23252 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23254 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23255 piece_size);
23258 /* Update DST rtx. */
23259 return dst;
23261 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23262 static void
23263 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23264 rtx count, int max_size)
23266 count =
23267 expand_simple_binop (counter_mode (count), AND, count,
23268 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23269 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23270 gen_lowpart (QImode, value), count, QImode,
23271 1, max_size / 2, true);
23274 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23275 static void
23276 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23277 rtx count, int max_size)
23279 rtx dest;
23281 if (CONST_INT_P (count))
23283 HOST_WIDE_INT countval = INTVAL (count);
23284 HOST_WIDE_INT epilogue_size = countval % max_size;
23285 int i;
23287 /* For now MAX_SIZE should be a power of 2. This assert could be
23288 relaxed, but it'll require a bit more complicated epilogue
23289 expanding. */
23290 gcc_assert ((max_size & (max_size - 1)) == 0);
23291 for (i = max_size; i >= 1; i >>= 1)
23293 if (epilogue_size & i)
23295 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23296 destmem = emit_memset (destmem, destptr, vec_value, i);
23297 else
23298 destmem = emit_memset (destmem, destptr, value, i);
23301 return;
23303 if (max_size > 32)
23305 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23306 return;
23308 if (max_size > 16)
23310 rtx label = ix86_expand_aligntest (count, 16, true);
23311 if (TARGET_64BIT)
23313 dest = change_address (destmem, DImode, destptr);
23314 emit_insn (gen_strset (destptr, dest, value));
23315 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23316 emit_insn (gen_strset (destptr, dest, value));
23318 else
23320 dest = change_address (destmem, SImode, destptr);
23321 emit_insn (gen_strset (destptr, dest, value));
23322 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23323 emit_insn (gen_strset (destptr, dest, value));
23324 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23325 emit_insn (gen_strset (destptr, dest, value));
23326 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23327 emit_insn (gen_strset (destptr, dest, value));
23329 emit_label (label);
23330 LABEL_NUSES (label) = 1;
23332 if (max_size > 8)
23334 rtx label = ix86_expand_aligntest (count, 8, true);
23335 if (TARGET_64BIT)
23337 dest = change_address (destmem, DImode, destptr);
23338 emit_insn (gen_strset (destptr, dest, value));
23340 else
23342 dest = change_address (destmem, SImode, destptr);
23343 emit_insn (gen_strset (destptr, dest, value));
23344 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23345 emit_insn (gen_strset (destptr, dest, value));
23347 emit_label (label);
23348 LABEL_NUSES (label) = 1;
23350 if (max_size > 4)
23352 rtx label = ix86_expand_aligntest (count, 4, true);
23353 dest = change_address (destmem, SImode, destptr);
23354 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23355 emit_label (label);
23356 LABEL_NUSES (label) = 1;
23358 if (max_size > 2)
23360 rtx label = ix86_expand_aligntest (count, 2, true);
23361 dest = change_address (destmem, HImode, destptr);
23362 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23363 emit_label (label);
23364 LABEL_NUSES (label) = 1;
23366 if (max_size > 1)
23368 rtx label = ix86_expand_aligntest (count, 1, true);
23369 dest = change_address (destmem, QImode, destptr);
23370 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23371 emit_label (label);
23372 LABEL_NUSES (label) = 1;
23376 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23377 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23378 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23379 ignored.
23380 Return value is updated DESTMEM. */
23381 static rtx
23382 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23383 rtx destptr, rtx srcptr, rtx value,
23384 rtx vec_value, rtx count, int align,
23385 int desired_alignment, bool issetmem)
23387 int i;
23388 for (i = 1; i < desired_alignment; i <<= 1)
23390 if (align <= i)
23392 rtx label = ix86_expand_aligntest (destptr, i, false);
23393 if (issetmem)
23395 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23396 destmem = emit_memset (destmem, destptr, vec_value, i);
23397 else
23398 destmem = emit_memset (destmem, destptr, value, i);
23400 else
23401 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23402 ix86_adjust_counter (count, i);
23403 emit_label (label);
23404 LABEL_NUSES (label) = 1;
23405 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23408 return destmem;
23411 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23412 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23413 and jump to DONE_LABEL. */
23414 static void
23415 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23416 rtx destptr, rtx srcptr,
23417 rtx value, rtx vec_value,
23418 rtx count, int size,
23419 rtx done_label, bool issetmem)
23421 rtx label = ix86_expand_aligntest (count, size, false);
23422 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23423 rtx modesize;
23424 int n;
23426 /* If we do not have vector value to copy, we must reduce size. */
23427 if (issetmem)
23429 if (!vec_value)
23431 if (GET_MODE (value) == VOIDmode && size > 8)
23432 mode = Pmode;
23433 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23434 mode = GET_MODE (value);
23436 else
23437 mode = GET_MODE (vec_value), value = vec_value;
23439 else
23441 /* Choose appropriate vector mode. */
23442 if (size >= 32)
23443 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23444 else if (size >= 16)
23445 mode = TARGET_SSE ? V16QImode : DImode;
23446 srcmem = change_address (srcmem, mode, srcptr);
23448 destmem = change_address (destmem, mode, destptr);
23449 modesize = GEN_INT (GET_MODE_SIZE (mode));
23450 gcc_assert (GET_MODE_SIZE (mode) <= size);
23451 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23453 if (issetmem)
23454 emit_move_insn (destmem, gen_lowpart (mode, value));
23455 else
23457 emit_move_insn (destmem, srcmem);
23458 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23460 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23463 destmem = offset_address (destmem, count, 1);
23464 destmem = offset_address (destmem, GEN_INT (-2 * size),
23465 GET_MODE_SIZE (mode));
23466 if (!issetmem)
23468 srcmem = offset_address (srcmem, count, 1);
23469 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23470 GET_MODE_SIZE (mode));
23472 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23474 if (issetmem)
23475 emit_move_insn (destmem, gen_lowpart (mode, value));
23476 else
23478 emit_move_insn (destmem, srcmem);
23479 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23481 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23483 emit_jump_insn (gen_jump (done_label));
23484 emit_barrier ();
23486 emit_label (label);
23487 LABEL_NUSES (label) = 1;
23490 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23491 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23492 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23493 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23494 DONE_LABEL is a label after the whole copying sequence. The label is created
23495 on demand if *DONE_LABEL is NULL.
23496 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23497 bounds after the initial copies.
23499 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23500 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23501 we will dispatch to a library call for large blocks.
23503 In pseudocode we do:
23505 if (COUNT < SIZE)
23507 Assume that SIZE is 4. Bigger sizes are handled analogously
23508 if (COUNT & 4)
23510 copy 4 bytes from SRCPTR to DESTPTR
23511 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23512 goto done_label
23514 if (!COUNT)
23515 goto done_label;
23516 copy 1 byte from SRCPTR to DESTPTR
23517 if (COUNT & 2)
23519 copy 2 bytes from SRCPTR to DESTPTR
23520 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23523 else
23525 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23526 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23528 OLD_DESPTR = DESTPTR;
23529 Align DESTPTR up to DESIRED_ALIGN
23530 SRCPTR += DESTPTR - OLD_DESTPTR
23531 COUNT -= DEST_PTR - OLD_DESTPTR
23532 if (DYNAMIC_CHECK)
23533 Round COUNT down to multiple of SIZE
23534 << optional caller supplied zero size guard is here >>
23535 << optional caller suppplied dynamic check is here >>
23536 << caller supplied main copy loop is here >>
23538 done_label:
23540 static void
23541 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23542 rtx *destptr, rtx *srcptr,
23543 enum machine_mode mode,
23544 rtx value, rtx vec_value,
23545 rtx *count,
23546 rtx *done_label,
23547 int size,
23548 int desired_align,
23549 int align,
23550 unsigned HOST_WIDE_INT *min_size,
23551 bool dynamic_check,
23552 bool issetmem)
23554 rtx loop_label = NULL, label;
23555 int n;
23556 rtx modesize;
23557 int prolog_size = 0;
23558 rtx mode_value;
23560 /* Chose proper value to copy. */
23561 if (issetmem && VECTOR_MODE_P (mode))
23562 mode_value = vec_value;
23563 else
23564 mode_value = value;
23565 gcc_assert (GET_MODE_SIZE (mode) <= size);
23567 /* See if block is big or small, handle small blocks. */
23568 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23570 int size2 = size;
23571 loop_label = gen_label_rtx ();
23573 if (!*done_label)
23574 *done_label = gen_label_rtx ();
23576 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23577 1, loop_label);
23578 size2 >>= 1;
23580 /* Handle sizes > 3. */
23581 for (;size2 > 2; size2 >>= 1)
23582 expand_small_movmem_or_setmem (destmem, srcmem,
23583 *destptr, *srcptr,
23584 value, vec_value,
23585 *count,
23586 size2, *done_label, issetmem);
23587 /* Nothing to copy? Jump to DONE_LABEL if so */
23588 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23589 1, *done_label);
23591 /* Do a byte copy. */
23592 destmem = change_address (destmem, QImode, *destptr);
23593 if (issetmem)
23594 emit_move_insn (destmem, gen_lowpart (QImode, value));
23595 else
23597 srcmem = change_address (srcmem, QImode, *srcptr);
23598 emit_move_insn (destmem, srcmem);
23601 /* Handle sizes 2 and 3. */
23602 label = ix86_expand_aligntest (*count, 2, false);
23603 destmem = change_address (destmem, HImode, *destptr);
23604 destmem = offset_address (destmem, *count, 1);
23605 destmem = offset_address (destmem, GEN_INT (-2), 2);
23606 if (issetmem)
23607 emit_move_insn (destmem, gen_lowpart (HImode, value));
23608 else
23610 srcmem = change_address (srcmem, HImode, *srcptr);
23611 srcmem = offset_address (srcmem, *count, 1);
23612 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23613 emit_move_insn (destmem, srcmem);
23616 emit_label (label);
23617 LABEL_NUSES (label) = 1;
23618 emit_jump_insn (gen_jump (*done_label));
23619 emit_barrier ();
23621 else
23622 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23623 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23625 /* Start memcpy for COUNT >= SIZE. */
23626 if (loop_label)
23628 emit_label (loop_label);
23629 LABEL_NUSES (loop_label) = 1;
23632 /* Copy first desired_align bytes. */
23633 if (!issetmem)
23634 srcmem = change_address (srcmem, mode, *srcptr);
23635 destmem = change_address (destmem, mode, *destptr);
23636 modesize = GEN_INT (GET_MODE_SIZE (mode));
23637 for (n = 0; prolog_size < desired_align - align; n++)
23639 if (issetmem)
23640 emit_move_insn (destmem, mode_value);
23641 else
23643 emit_move_insn (destmem, srcmem);
23644 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23646 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23647 prolog_size += GET_MODE_SIZE (mode);
23651 /* Copy last SIZE bytes. */
23652 destmem = offset_address (destmem, *count, 1);
23653 destmem = offset_address (destmem,
23654 GEN_INT (-size - prolog_size),
23656 if (issetmem)
23657 emit_move_insn (destmem, mode_value);
23658 else
23660 srcmem = offset_address (srcmem, *count, 1);
23661 srcmem = offset_address (srcmem,
23662 GEN_INT (-size - prolog_size),
23664 emit_move_insn (destmem, srcmem);
23666 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23668 destmem = offset_address (destmem, modesize, 1);
23669 if (issetmem)
23670 emit_move_insn (destmem, mode_value);
23671 else
23673 srcmem = offset_address (srcmem, modesize, 1);
23674 emit_move_insn (destmem, srcmem);
23678 /* Align destination. */
23679 if (desired_align > 1 && desired_align > align)
23681 rtx saveddest = *destptr;
23683 gcc_assert (desired_align <= size);
23684 /* Align destptr up, place it to new register. */
23685 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23686 GEN_INT (prolog_size),
23687 NULL_RTX, 1, OPTAB_DIRECT);
23688 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23689 GEN_INT (-desired_align),
23690 *destptr, 1, OPTAB_DIRECT);
23691 /* See how many bytes we skipped. */
23692 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23693 *destptr,
23694 saveddest, 1, OPTAB_DIRECT);
23695 /* Adjust srcptr and count. */
23696 if (!issetmem)
23697 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23698 *srcptr, 1, OPTAB_DIRECT);
23699 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23700 saveddest, *count, 1, OPTAB_DIRECT);
23701 /* We copied at most size + prolog_size. */
23702 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23703 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23704 else
23705 *min_size = 0;
23707 /* Our loops always round down the bock size, but for dispatch to library
23708 we need precise value. */
23709 if (dynamic_check)
23710 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23711 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23713 else
23715 gcc_assert (prolog_size == 0);
23716 /* Decrease count, so we won't end up copying last word twice. */
23717 if (!CONST_INT_P (*count))
23718 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23719 constm1_rtx, *count, 1, OPTAB_DIRECT);
23720 else
23721 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23722 if (*min_size)
23723 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23728 /* This function is like the previous one, except here we know how many bytes
23729 need to be copied. That allows us to update alignment not only of DST, which
23730 is returned, but also of SRC, which is passed as a pointer for that
23731 reason. */
23732 static rtx
23733 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23734 rtx srcreg, rtx value, rtx vec_value,
23735 int desired_align, int align_bytes,
23736 bool issetmem)
23738 rtx src = NULL;
23739 rtx orig_dst = dst;
23740 rtx orig_src = NULL;
23741 int piece_size = 1;
23742 int copied_bytes = 0;
23744 if (!issetmem)
23746 gcc_assert (srcp != NULL);
23747 src = *srcp;
23748 orig_src = src;
23751 for (piece_size = 1;
23752 piece_size <= desired_align && copied_bytes < align_bytes;
23753 piece_size <<= 1)
23755 if (align_bytes & piece_size)
23757 if (issetmem)
23759 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23760 dst = emit_memset (dst, destreg, vec_value, piece_size);
23761 else
23762 dst = emit_memset (dst, destreg, value, piece_size);
23764 else
23765 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23766 copied_bytes += piece_size;
23769 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23770 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23771 if (MEM_SIZE_KNOWN_P (orig_dst))
23772 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23774 if (!issetmem)
23776 int src_align_bytes = get_mem_align_offset (src, desired_align
23777 * BITS_PER_UNIT);
23778 if (src_align_bytes >= 0)
23779 src_align_bytes = desired_align - src_align_bytes;
23780 if (src_align_bytes >= 0)
23782 unsigned int src_align;
23783 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23785 if ((src_align_bytes & (src_align - 1))
23786 == (align_bytes & (src_align - 1)))
23787 break;
23789 if (src_align > (unsigned int) desired_align)
23790 src_align = desired_align;
23791 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23792 set_mem_align (src, src_align * BITS_PER_UNIT);
23794 if (MEM_SIZE_KNOWN_P (orig_src))
23795 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23796 *srcp = src;
23799 return dst;
23802 /* Return true if ALG can be used in current context.
23803 Assume we expand memset if MEMSET is true. */
23804 static bool
23805 alg_usable_p (enum stringop_alg alg, bool memset)
23807 if (alg == no_stringop)
23808 return false;
23809 if (alg == vector_loop)
23810 return TARGET_SSE || TARGET_AVX;
23811 /* Algorithms using the rep prefix want at least edi and ecx;
23812 additionally, memset wants eax and memcpy wants esi. Don't
23813 consider such algorithms if the user has appropriated those
23814 registers for their own purposes. */
23815 if (alg == rep_prefix_1_byte
23816 || alg == rep_prefix_4_byte
23817 || alg == rep_prefix_8_byte)
23818 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23819 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23820 return true;
23823 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23824 static enum stringop_alg
23825 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23826 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23827 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23829 const struct stringop_algs * algs;
23830 bool optimize_for_speed;
23831 int max = -1;
23832 const struct processor_costs *cost;
23833 int i;
23834 bool any_alg_usable_p = false;
23836 *noalign = false;
23837 *dynamic_check = -1;
23839 /* Even if the string operation call is cold, we still might spend a lot
23840 of time processing large blocks. */
23841 if (optimize_function_for_size_p (cfun)
23842 || (optimize_insn_for_size_p ()
23843 && (max_size < 256
23844 || (expected_size != -1 && expected_size < 256))))
23845 optimize_for_speed = false;
23846 else
23847 optimize_for_speed = true;
23849 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23850 if (memset)
23851 algs = &cost->memset[TARGET_64BIT != 0];
23852 else
23853 algs = &cost->memcpy[TARGET_64BIT != 0];
23855 /* See maximal size for user defined algorithm. */
23856 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23858 enum stringop_alg candidate = algs->size[i].alg;
23859 bool usable = alg_usable_p (candidate, memset);
23860 any_alg_usable_p |= usable;
23862 if (candidate != libcall && candidate && usable)
23863 max = algs->size[i].max;
23866 /* If expected size is not known but max size is small enough
23867 so inline version is a win, set expected size into
23868 the range. */
23869 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23870 && expected_size == -1)
23871 expected_size = min_size / 2 + max_size / 2;
23873 /* If user specified the algorithm, honnor it if possible. */
23874 if (ix86_stringop_alg != no_stringop
23875 && alg_usable_p (ix86_stringop_alg, memset))
23876 return ix86_stringop_alg;
23877 /* rep; movq or rep; movl is the smallest variant. */
23878 else if (!optimize_for_speed)
23880 *noalign = true;
23881 if (!count || (count & 3) || (memset && !zero_memset))
23882 return alg_usable_p (rep_prefix_1_byte, memset)
23883 ? rep_prefix_1_byte : loop_1_byte;
23884 else
23885 return alg_usable_p (rep_prefix_4_byte, memset)
23886 ? rep_prefix_4_byte : loop;
23888 /* Very tiny blocks are best handled via the loop, REP is expensive to
23889 setup. */
23890 else if (expected_size != -1 && expected_size < 4)
23891 return loop_1_byte;
23892 else if (expected_size != -1)
23894 enum stringop_alg alg = libcall;
23895 bool alg_noalign = false;
23896 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23898 /* We get here if the algorithms that were not libcall-based
23899 were rep-prefix based and we are unable to use rep prefixes
23900 based on global register usage. Break out of the loop and
23901 use the heuristic below. */
23902 if (algs->size[i].max == 0)
23903 break;
23904 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23906 enum stringop_alg candidate = algs->size[i].alg;
23908 if (candidate != libcall && alg_usable_p (candidate, memset))
23910 alg = candidate;
23911 alg_noalign = algs->size[i].noalign;
23913 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23914 last non-libcall inline algorithm. */
23915 if (TARGET_INLINE_ALL_STRINGOPS)
23917 /* When the current size is best to be copied by a libcall,
23918 but we are still forced to inline, run the heuristic below
23919 that will pick code for medium sized blocks. */
23920 if (alg != libcall)
23922 *noalign = alg_noalign;
23923 return alg;
23925 break;
23927 else if (alg_usable_p (candidate, memset))
23929 *noalign = algs->size[i].noalign;
23930 return candidate;
23935 /* When asked to inline the call anyway, try to pick meaningful choice.
23936 We look for maximal size of block that is faster to copy by hand and
23937 take blocks of at most of that size guessing that average size will
23938 be roughly half of the block.
23940 If this turns out to be bad, we might simply specify the preferred
23941 choice in ix86_costs. */
23942 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23943 && (algs->unknown_size == libcall
23944 || !alg_usable_p (algs->unknown_size, memset)))
23946 enum stringop_alg alg;
23948 /* If there aren't any usable algorithms, then recursing on
23949 smaller sizes isn't going to find anything. Just return the
23950 simple byte-at-a-time copy loop. */
23951 if (!any_alg_usable_p)
23953 /* Pick something reasonable. */
23954 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23955 *dynamic_check = 128;
23956 return loop_1_byte;
23958 if (max == -1)
23959 max = 4096;
23960 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23961 zero_memset, dynamic_check, noalign);
23962 gcc_assert (*dynamic_check == -1);
23963 gcc_assert (alg != libcall);
23964 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23965 *dynamic_check = max;
23966 return alg;
23968 return (alg_usable_p (algs->unknown_size, memset)
23969 ? algs->unknown_size : libcall);
23972 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23973 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23974 static int
23975 decide_alignment (int align,
23976 enum stringop_alg alg,
23977 int expected_size,
23978 enum machine_mode move_mode)
23980 int desired_align = 0;
23982 gcc_assert (alg != no_stringop);
23984 if (alg == libcall)
23985 return 0;
23986 if (move_mode == VOIDmode)
23987 return 0;
23989 desired_align = GET_MODE_SIZE (move_mode);
23990 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23991 copying whole cacheline at once. */
23992 if (TARGET_PENTIUMPRO
23993 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23994 desired_align = 8;
23996 if (optimize_size)
23997 desired_align = 1;
23998 if (desired_align < align)
23999 desired_align = align;
24000 if (expected_size != -1 && expected_size < 4)
24001 desired_align = align;
24003 return desired_align;
24007 /* Helper function for memcpy. For QImode value 0xXY produce
24008 0xXYXYXYXY of wide specified by MODE. This is essentially
24009 a * 0x10101010, but we can do slightly better than
24010 synth_mult by unwinding the sequence by hand on CPUs with
24011 slow multiply. */
24012 static rtx
24013 promote_duplicated_reg (enum machine_mode mode, rtx val)
24015 enum machine_mode valmode = GET_MODE (val);
24016 rtx tmp;
24017 int nops = mode == DImode ? 3 : 2;
24019 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24020 if (val == const0_rtx)
24021 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24022 if (CONST_INT_P (val))
24024 HOST_WIDE_INT v = INTVAL (val) & 255;
24026 v |= v << 8;
24027 v |= v << 16;
24028 if (mode == DImode)
24029 v |= (v << 16) << 16;
24030 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24033 if (valmode == VOIDmode)
24034 valmode = QImode;
24035 if (valmode != QImode)
24036 val = gen_lowpart (QImode, val);
24037 if (mode == QImode)
24038 return val;
24039 if (!TARGET_PARTIAL_REG_STALL)
24040 nops--;
24041 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24042 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24043 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24044 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24046 rtx reg = convert_modes (mode, QImode, val, true);
24047 tmp = promote_duplicated_reg (mode, const1_rtx);
24048 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24049 OPTAB_DIRECT);
24051 else
24053 rtx reg = convert_modes (mode, QImode, val, true);
24055 if (!TARGET_PARTIAL_REG_STALL)
24056 if (mode == SImode)
24057 emit_insn (gen_movsi_insv_1 (reg, reg));
24058 else
24059 emit_insn (gen_movdi_insv_1 (reg, reg));
24060 else
24062 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24063 NULL, 1, OPTAB_DIRECT);
24064 reg =
24065 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24067 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24068 NULL, 1, OPTAB_DIRECT);
24069 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24070 if (mode == SImode)
24071 return reg;
24072 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24073 NULL, 1, OPTAB_DIRECT);
24074 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24075 return reg;
24079 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24080 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24081 alignment from ALIGN to DESIRED_ALIGN. */
24082 static rtx
24083 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24084 int align)
24086 rtx promoted_val;
24088 if (TARGET_64BIT
24089 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24090 promoted_val = promote_duplicated_reg (DImode, val);
24091 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24092 promoted_val = promote_duplicated_reg (SImode, val);
24093 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24094 promoted_val = promote_duplicated_reg (HImode, val);
24095 else
24096 promoted_val = val;
24098 return promoted_val;
24101 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24102 operations when profitable. The code depends upon architecture, block size
24103 and alignment, but always has one of the following overall structures:
24105 Aligned move sequence:
24107 1) Prologue guard: Conditional that jumps up to epilogues for small
24108 blocks that can be handled by epilogue alone. This is faster
24109 but also needed for correctness, since prologue assume the block
24110 is larger than the desired alignment.
24112 Optional dynamic check for size and libcall for large
24113 blocks is emitted here too, with -minline-stringops-dynamically.
24115 2) Prologue: copy first few bytes in order to get destination
24116 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24117 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24118 copied. We emit either a jump tree on power of two sized
24119 blocks, or a byte loop.
24121 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24122 with specified algorithm.
24124 4) Epilogue: code copying tail of the block that is too small to be
24125 handled by main body (or up to size guarded by prologue guard).
24127 Misaligned move sequence
24129 1) missaligned move prologue/epilogue containing:
24130 a) Prologue handling small memory blocks and jumping to done_label
24131 (skipped if blocks are known to be large enough)
24132 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24133 needed by single possibly misaligned move
24134 (skipped if alignment is not needed)
24135 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24137 2) Zero size guard dispatching to done_label, if needed
24139 3) dispatch to library call, if needed,
24141 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24142 with specified algorithm. */
24143 bool
24144 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24145 rtx align_exp, rtx expected_align_exp,
24146 rtx expected_size_exp, rtx min_size_exp,
24147 rtx max_size_exp, rtx probable_max_size_exp,
24148 bool issetmem)
24150 rtx destreg;
24151 rtx srcreg = NULL;
24152 rtx label = NULL;
24153 rtx tmp;
24154 rtx jump_around_label = NULL;
24155 HOST_WIDE_INT align = 1;
24156 unsigned HOST_WIDE_INT count = 0;
24157 HOST_WIDE_INT expected_size = -1;
24158 int size_needed = 0, epilogue_size_needed;
24159 int desired_align = 0, align_bytes = 0;
24160 enum stringop_alg alg;
24161 rtx promoted_val = NULL;
24162 rtx vec_promoted_val = NULL;
24163 bool force_loopy_epilogue = false;
24164 int dynamic_check;
24165 bool need_zero_guard = false;
24166 bool noalign;
24167 enum machine_mode move_mode = VOIDmode;
24168 int unroll_factor = 1;
24169 /* TODO: Once value ranges are available, fill in proper data. */
24170 unsigned HOST_WIDE_INT min_size = 0;
24171 unsigned HOST_WIDE_INT max_size = -1;
24172 unsigned HOST_WIDE_INT probable_max_size = -1;
24173 bool misaligned_prologue_used = false;
24175 if (CONST_INT_P (align_exp))
24176 align = INTVAL (align_exp);
24177 /* i386 can do misaligned access on reasonably increased cost. */
24178 if (CONST_INT_P (expected_align_exp)
24179 && INTVAL (expected_align_exp) > align)
24180 align = INTVAL (expected_align_exp);
24181 /* ALIGN is the minimum of destination and source alignment, but we care here
24182 just about destination alignment. */
24183 else if (!issetmem
24184 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24185 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24187 if (CONST_INT_P (count_exp))
24189 min_size = max_size = probable_max_size = count = expected_size
24190 = INTVAL (count_exp);
24191 /* When COUNT is 0, there is nothing to do. */
24192 if (!count)
24193 return true;
24195 else
24197 if (min_size_exp)
24198 min_size = INTVAL (min_size_exp);
24199 if (max_size_exp)
24200 max_size = INTVAL (max_size_exp);
24201 if (probable_max_size_exp)
24202 probable_max_size = INTVAL (probable_max_size_exp);
24203 if (CONST_INT_P (expected_size_exp))
24204 expected_size = INTVAL (expected_size_exp);
24207 /* Make sure we don't need to care about overflow later on. */
24208 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24209 return false;
24211 /* Step 0: Decide on preferred algorithm, desired alignment and
24212 size of chunks to be copied by main loop. */
24213 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24214 issetmem,
24215 issetmem && val_exp == const0_rtx,
24216 &dynamic_check, &noalign);
24217 if (alg == libcall)
24218 return false;
24219 gcc_assert (alg != no_stringop);
24221 /* For now vector-version of memset is generated only for memory zeroing, as
24222 creating of promoted vector value is very cheap in this case. */
24223 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24224 alg = unrolled_loop;
24226 if (!count)
24227 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24228 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24229 if (!issetmem)
24230 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24232 unroll_factor = 1;
24233 move_mode = word_mode;
24234 switch (alg)
24236 case libcall:
24237 case no_stringop:
24238 case last_alg:
24239 gcc_unreachable ();
24240 case loop_1_byte:
24241 need_zero_guard = true;
24242 move_mode = QImode;
24243 break;
24244 case loop:
24245 need_zero_guard = true;
24246 break;
24247 case unrolled_loop:
24248 need_zero_guard = true;
24249 unroll_factor = (TARGET_64BIT ? 4 : 2);
24250 break;
24251 case vector_loop:
24252 need_zero_guard = true;
24253 unroll_factor = 4;
24254 /* Find the widest supported mode. */
24255 move_mode = word_mode;
24256 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24257 != CODE_FOR_nothing)
24258 move_mode = GET_MODE_WIDER_MODE (move_mode);
24260 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24261 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24262 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24264 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24265 move_mode = mode_for_vector (word_mode, nunits);
24266 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24267 move_mode = word_mode;
24269 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24270 break;
24271 case rep_prefix_8_byte:
24272 move_mode = DImode;
24273 break;
24274 case rep_prefix_4_byte:
24275 move_mode = SImode;
24276 break;
24277 case rep_prefix_1_byte:
24278 move_mode = QImode;
24279 break;
24281 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24282 epilogue_size_needed = size_needed;
24284 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24285 if (!TARGET_ALIGN_STRINGOPS || noalign)
24286 align = desired_align;
24288 /* Step 1: Prologue guard. */
24290 /* Alignment code needs count to be in register. */
24291 if (CONST_INT_P (count_exp) && desired_align > align)
24293 if (INTVAL (count_exp) > desired_align
24294 && INTVAL (count_exp) > size_needed)
24296 align_bytes
24297 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24298 if (align_bytes <= 0)
24299 align_bytes = 0;
24300 else
24301 align_bytes = desired_align - align_bytes;
24303 if (align_bytes == 0)
24304 count_exp = force_reg (counter_mode (count_exp), count_exp);
24306 gcc_assert (desired_align >= 1 && align >= 1);
24308 /* Misaligned move sequences handle both prologue and epilogue at once.
24309 Default code generation results in a smaller code for large alignments
24310 and also avoids redundant job when sizes are known precisely. */
24311 misaligned_prologue_used
24312 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24313 && MAX (desired_align, epilogue_size_needed) <= 32
24314 && desired_align <= epilogue_size_needed
24315 && ((desired_align > align && !align_bytes)
24316 || (!count && epilogue_size_needed > 1)));
24318 /* Do the cheap promotion to allow better CSE across the
24319 main loop and epilogue (ie one load of the big constant in the
24320 front of all code.
24321 For now the misaligned move sequences do not have fast path
24322 without broadcasting. */
24323 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24325 if (alg == vector_loop)
24327 gcc_assert (val_exp == const0_rtx);
24328 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24329 promoted_val = promote_duplicated_reg_to_size (val_exp,
24330 GET_MODE_SIZE (word_mode),
24331 desired_align, align);
24333 else
24335 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24336 desired_align, align);
24339 /* Misaligned move sequences handles both prologues and epilogues at once.
24340 Default code generation results in smaller code for large alignments and
24341 also avoids redundant job when sizes are known precisely. */
24342 if (misaligned_prologue_used)
24344 /* Misaligned move prologue handled small blocks by itself. */
24345 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24346 (dst, src, &destreg, &srcreg,
24347 move_mode, promoted_val, vec_promoted_val,
24348 &count_exp,
24349 &jump_around_label,
24350 desired_align < align
24351 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24352 desired_align, align, &min_size, dynamic_check, issetmem);
24353 if (!issetmem)
24354 src = change_address (src, BLKmode, srcreg);
24355 dst = change_address (dst, BLKmode, destreg);
24356 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24357 epilogue_size_needed = 0;
24358 if (need_zero_guard && !min_size)
24360 /* It is possible that we copied enough so the main loop will not
24361 execute. */
24362 gcc_assert (size_needed > 1);
24363 if (jump_around_label == NULL_RTX)
24364 jump_around_label = gen_label_rtx ();
24365 emit_cmp_and_jump_insns (count_exp,
24366 GEN_INT (size_needed),
24367 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24368 if (expected_size == -1
24369 || expected_size < (desired_align - align) / 2 + size_needed)
24370 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24371 else
24372 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24375 /* Ensure that alignment prologue won't copy past end of block. */
24376 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24378 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24379 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24380 Make sure it is power of 2. */
24381 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24383 /* To improve performance of small blocks, we jump around the VAL
24384 promoting mode. This mean that if the promoted VAL is not constant,
24385 we might not use it in the epilogue and have to use byte
24386 loop variant. */
24387 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24388 force_loopy_epilogue = true;
24389 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24390 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24392 /* If main algorithm works on QImode, no epilogue is needed.
24393 For small sizes just don't align anything. */
24394 if (size_needed == 1)
24395 desired_align = align;
24396 else
24397 goto epilogue;
24399 else if (!count
24400 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24402 label = gen_label_rtx ();
24403 emit_cmp_and_jump_insns (count_exp,
24404 GEN_INT (epilogue_size_needed),
24405 LTU, 0, counter_mode (count_exp), 1, label);
24406 if (expected_size == -1 || expected_size < epilogue_size_needed)
24407 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24408 else
24409 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24413 /* Emit code to decide on runtime whether library call or inline should be
24414 used. */
24415 if (dynamic_check != -1)
24417 if (!issetmem && CONST_INT_P (count_exp))
24419 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24421 emit_block_move_via_libcall (dst, src, count_exp, false);
24422 count_exp = const0_rtx;
24423 goto epilogue;
24426 else
24428 rtx hot_label = gen_label_rtx ();
24429 if (jump_around_label == NULL_RTX)
24430 jump_around_label = gen_label_rtx ();
24431 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24432 LEU, 0, counter_mode (count_exp),
24433 1, hot_label);
24434 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24435 if (issetmem)
24436 set_storage_via_libcall (dst, count_exp, val_exp, false);
24437 else
24438 emit_block_move_via_libcall (dst, src, count_exp, false);
24439 emit_jump (jump_around_label);
24440 emit_label (hot_label);
24444 /* Step 2: Alignment prologue. */
24445 /* Do the expensive promotion once we branched off the small blocks. */
24446 if (issetmem && !promoted_val)
24447 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24448 desired_align, align);
24450 if (desired_align > align && !misaligned_prologue_used)
24452 if (align_bytes == 0)
24454 /* Except for the first move in prologue, we no longer know
24455 constant offset in aliasing info. It don't seems to worth
24456 the pain to maintain it for the first move, so throw away
24457 the info early. */
24458 dst = change_address (dst, BLKmode, destreg);
24459 if (!issetmem)
24460 src = change_address (src, BLKmode, srcreg);
24461 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24462 promoted_val, vec_promoted_val,
24463 count_exp, align, desired_align,
24464 issetmem);
24465 /* At most desired_align - align bytes are copied. */
24466 if (min_size < (unsigned)(desired_align - align))
24467 min_size = 0;
24468 else
24469 min_size -= desired_align - align;
24471 else
24473 /* If we know how many bytes need to be stored before dst is
24474 sufficiently aligned, maintain aliasing info accurately. */
24475 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24476 srcreg,
24477 promoted_val,
24478 vec_promoted_val,
24479 desired_align,
24480 align_bytes,
24481 issetmem);
24483 count_exp = plus_constant (counter_mode (count_exp),
24484 count_exp, -align_bytes);
24485 count -= align_bytes;
24486 min_size -= align_bytes;
24487 max_size -= align_bytes;
24489 if (need_zero_guard
24490 && !min_size
24491 && (count < (unsigned HOST_WIDE_INT) size_needed
24492 || (align_bytes == 0
24493 && count < ((unsigned HOST_WIDE_INT) size_needed
24494 + desired_align - align))))
24496 /* It is possible that we copied enough so the main loop will not
24497 execute. */
24498 gcc_assert (size_needed > 1);
24499 if (label == NULL_RTX)
24500 label = gen_label_rtx ();
24501 emit_cmp_and_jump_insns (count_exp,
24502 GEN_INT (size_needed),
24503 LTU, 0, counter_mode (count_exp), 1, label);
24504 if (expected_size == -1
24505 || expected_size < (desired_align - align) / 2 + size_needed)
24506 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24507 else
24508 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24511 if (label && size_needed == 1)
24513 emit_label (label);
24514 LABEL_NUSES (label) = 1;
24515 label = NULL;
24516 epilogue_size_needed = 1;
24517 if (issetmem)
24518 promoted_val = val_exp;
24520 else if (label == NULL_RTX && !misaligned_prologue_used)
24521 epilogue_size_needed = size_needed;
24523 /* Step 3: Main loop. */
24525 switch (alg)
24527 case libcall:
24528 case no_stringop:
24529 case last_alg:
24530 gcc_unreachable ();
24531 case loop_1_byte:
24532 case loop:
24533 case unrolled_loop:
24534 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24535 count_exp, move_mode, unroll_factor,
24536 expected_size, issetmem);
24537 break;
24538 case vector_loop:
24539 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24540 vec_promoted_val, count_exp, move_mode,
24541 unroll_factor, expected_size, issetmem);
24542 break;
24543 case rep_prefix_8_byte:
24544 case rep_prefix_4_byte:
24545 case rep_prefix_1_byte:
24546 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24547 val_exp, count_exp, move_mode, issetmem);
24548 break;
24550 /* Adjust properly the offset of src and dest memory for aliasing. */
24551 if (CONST_INT_P (count_exp))
24553 if (!issetmem)
24554 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24555 (count / size_needed) * size_needed);
24556 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24557 (count / size_needed) * size_needed);
24559 else
24561 if (!issetmem)
24562 src = change_address (src, BLKmode, srcreg);
24563 dst = change_address (dst, BLKmode, destreg);
24566 /* Step 4: Epilogue to copy the remaining bytes. */
24567 epilogue:
24568 if (label)
24570 /* When the main loop is done, COUNT_EXP might hold original count,
24571 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24572 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24573 bytes. Compensate if needed. */
24575 if (size_needed < epilogue_size_needed)
24577 tmp =
24578 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24579 GEN_INT (size_needed - 1), count_exp, 1,
24580 OPTAB_DIRECT);
24581 if (tmp != count_exp)
24582 emit_move_insn (count_exp, tmp);
24584 emit_label (label);
24585 LABEL_NUSES (label) = 1;
24588 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24590 if (force_loopy_epilogue)
24591 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24592 epilogue_size_needed);
24593 else
24595 if (issetmem)
24596 expand_setmem_epilogue (dst, destreg, promoted_val,
24597 vec_promoted_val, count_exp,
24598 epilogue_size_needed);
24599 else
24600 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24601 epilogue_size_needed);
24604 if (jump_around_label)
24605 emit_label (jump_around_label);
24606 return true;
24610 /* Expand the appropriate insns for doing strlen if not just doing
24611 repnz; scasb
24613 out = result, initialized with the start address
24614 align_rtx = alignment of the address.
24615 scratch = scratch register, initialized with the startaddress when
24616 not aligned, otherwise undefined
24618 This is just the body. It needs the initializations mentioned above and
24619 some address computing at the end. These things are done in i386.md. */
24621 static void
24622 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24624 int align;
24625 rtx tmp;
24626 rtx align_2_label = NULL_RTX;
24627 rtx align_3_label = NULL_RTX;
24628 rtx align_4_label = gen_label_rtx ();
24629 rtx end_0_label = gen_label_rtx ();
24630 rtx mem;
24631 rtx tmpreg = gen_reg_rtx (SImode);
24632 rtx scratch = gen_reg_rtx (SImode);
24633 rtx cmp;
24635 align = 0;
24636 if (CONST_INT_P (align_rtx))
24637 align = INTVAL (align_rtx);
24639 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24641 /* Is there a known alignment and is it less than 4? */
24642 if (align < 4)
24644 rtx scratch1 = gen_reg_rtx (Pmode);
24645 emit_move_insn (scratch1, out);
24646 /* Is there a known alignment and is it not 2? */
24647 if (align != 2)
24649 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24650 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24652 /* Leave just the 3 lower bits. */
24653 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24654 NULL_RTX, 0, OPTAB_WIDEN);
24656 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24657 Pmode, 1, align_4_label);
24658 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24659 Pmode, 1, align_2_label);
24660 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24661 Pmode, 1, align_3_label);
24663 else
24665 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24666 check if is aligned to 4 - byte. */
24668 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24669 NULL_RTX, 0, OPTAB_WIDEN);
24671 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24672 Pmode, 1, align_4_label);
24675 mem = change_address (src, QImode, out);
24677 /* Now compare the bytes. */
24679 /* Compare the first n unaligned byte on a byte per byte basis. */
24680 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24681 QImode, 1, end_0_label);
24683 /* Increment the address. */
24684 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24686 /* Not needed with an alignment of 2 */
24687 if (align != 2)
24689 emit_label (align_2_label);
24691 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24692 end_0_label);
24694 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24696 emit_label (align_3_label);
24699 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24700 end_0_label);
24702 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24705 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24706 align this loop. It gives only huge programs, but does not help to
24707 speed up. */
24708 emit_label (align_4_label);
24710 mem = change_address (src, SImode, out);
24711 emit_move_insn (scratch, mem);
24712 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24714 /* This formula yields a nonzero result iff one of the bytes is zero.
24715 This saves three branches inside loop and many cycles. */
24717 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24718 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24719 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24720 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24721 gen_int_mode (0x80808080, SImode)));
24722 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24723 align_4_label);
24725 if (TARGET_CMOVE)
24727 rtx reg = gen_reg_rtx (SImode);
24728 rtx reg2 = gen_reg_rtx (Pmode);
24729 emit_move_insn (reg, tmpreg);
24730 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24732 /* If zero is not in the first two bytes, move two bytes forward. */
24733 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24734 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24735 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24736 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24737 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24738 reg,
24739 tmpreg)));
24740 /* Emit lea manually to avoid clobbering of flags. */
24741 emit_insn (gen_rtx_SET (SImode, reg2,
24742 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24744 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24745 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24746 emit_insn (gen_rtx_SET (VOIDmode, out,
24747 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24748 reg2,
24749 out)));
24751 else
24753 rtx end_2_label = gen_label_rtx ();
24754 /* Is zero in the first two bytes? */
24756 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24757 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24758 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24759 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24760 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24761 pc_rtx);
24762 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24763 JUMP_LABEL (tmp) = end_2_label;
24765 /* Not in the first two. Move two bytes forward. */
24766 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24767 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24769 emit_label (end_2_label);
24773 /* Avoid branch in fixing the byte. */
24774 tmpreg = gen_lowpart (QImode, tmpreg);
24775 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24776 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24777 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24778 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24780 emit_label (end_0_label);
24783 /* Expand strlen. */
24785 bool
24786 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24788 rtx addr, scratch1, scratch2, scratch3, scratch4;
24790 /* The generic case of strlen expander is long. Avoid it's
24791 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24793 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24794 && !TARGET_INLINE_ALL_STRINGOPS
24795 && !optimize_insn_for_size_p ()
24796 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24797 return false;
24799 addr = force_reg (Pmode, XEXP (src, 0));
24800 scratch1 = gen_reg_rtx (Pmode);
24802 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24803 && !optimize_insn_for_size_p ())
24805 /* Well it seems that some optimizer does not combine a call like
24806 foo(strlen(bar), strlen(bar));
24807 when the move and the subtraction is done here. It does calculate
24808 the length just once when these instructions are done inside of
24809 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24810 often used and I use one fewer register for the lifetime of
24811 output_strlen_unroll() this is better. */
24813 emit_move_insn (out, addr);
24815 ix86_expand_strlensi_unroll_1 (out, src, align);
24817 /* strlensi_unroll_1 returns the address of the zero at the end of
24818 the string, like memchr(), so compute the length by subtracting
24819 the start address. */
24820 emit_insn (ix86_gen_sub3 (out, out, addr));
24822 else
24824 rtx unspec;
24826 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24827 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24828 return false;
24830 scratch2 = gen_reg_rtx (Pmode);
24831 scratch3 = gen_reg_rtx (Pmode);
24832 scratch4 = force_reg (Pmode, constm1_rtx);
24834 emit_move_insn (scratch3, addr);
24835 eoschar = force_reg (QImode, eoschar);
24837 src = replace_equiv_address_nv (src, scratch3);
24839 /* If .md starts supporting :P, this can be done in .md. */
24840 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24841 scratch4), UNSPEC_SCAS);
24842 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24843 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24844 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24846 return true;
24849 /* For given symbol (function) construct code to compute address of it's PLT
24850 entry in large x86-64 PIC model. */
24851 static rtx
24852 construct_plt_address (rtx symbol)
24854 rtx tmp, unspec;
24856 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24857 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24858 gcc_assert (Pmode == DImode);
24860 tmp = gen_reg_rtx (Pmode);
24861 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24863 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24864 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24865 return tmp;
24869 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24870 rtx callarg2,
24871 rtx pop, bool sibcall)
24873 unsigned int const cregs_size
24874 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24875 rtx vec[3 + cregs_size];
24876 rtx use = NULL, call;
24877 unsigned int vec_len = 0;
24879 if (pop == const0_rtx)
24880 pop = NULL;
24881 gcc_assert (!TARGET_64BIT || !pop);
24883 if (TARGET_MACHO && !TARGET_64BIT)
24885 #if TARGET_MACHO
24886 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24887 fnaddr = machopic_indirect_call_target (fnaddr);
24888 #endif
24890 else
24892 /* Static functions and indirect calls don't need the pic register. */
24893 if (flag_pic
24894 && (!TARGET_64BIT
24895 || (ix86_cmodel == CM_LARGE_PIC
24896 && DEFAULT_ABI != MS_ABI))
24897 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24898 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24899 use_reg (&use, pic_offset_table_rtx);
24902 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24904 rtx al = gen_rtx_REG (QImode, AX_REG);
24905 emit_move_insn (al, callarg2);
24906 use_reg (&use, al);
24909 if (ix86_cmodel == CM_LARGE_PIC
24910 && !TARGET_PECOFF
24911 && MEM_P (fnaddr)
24912 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24913 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24914 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24915 else if (sibcall
24916 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24917 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24919 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24920 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24923 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24924 if (retval)
24925 call = gen_rtx_SET (VOIDmode, retval, call);
24926 vec[vec_len++] = call;
24928 if (pop)
24930 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24931 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24932 vec[vec_len++] = pop;
24935 if (TARGET_64BIT_MS_ABI
24936 && (!callarg2 || INTVAL (callarg2) != -2))
24938 unsigned i;
24940 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24941 UNSPEC_MS_TO_SYSV_CALL);
24943 for (i = 0; i < cregs_size; i++)
24945 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24946 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24948 vec[vec_len++]
24949 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24953 if (vec_len > 1)
24954 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24955 call = emit_call_insn (call);
24956 if (use)
24957 CALL_INSN_FUNCTION_USAGE (call) = use;
24959 return call;
24962 /* Output the assembly for a call instruction. */
24964 const char *
24965 ix86_output_call_insn (rtx insn, rtx call_op)
24967 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24968 bool seh_nop_p = false;
24969 const char *xasm;
24971 if (SIBLING_CALL_P (insn))
24973 if (direct_p)
24974 xasm = "jmp\t%P0";
24975 /* SEH epilogue detection requires the indirect branch case
24976 to include REX.W. */
24977 else if (TARGET_SEH)
24978 xasm = "rex.W jmp %A0";
24979 else
24980 xasm = "jmp\t%A0";
24982 output_asm_insn (xasm, &call_op);
24983 return "";
24986 /* SEH unwinding can require an extra nop to be emitted in several
24987 circumstances. Determine if we have one of those. */
24988 if (TARGET_SEH)
24990 rtx i;
24992 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24994 /* If we get to another real insn, we don't need the nop. */
24995 if (INSN_P (i))
24996 break;
24998 /* If we get to the epilogue note, prevent a catch region from
24999 being adjacent to the standard epilogue sequence. If non-
25000 call-exceptions, we'll have done this during epilogue emission. */
25001 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25002 && !flag_non_call_exceptions
25003 && !can_throw_internal (insn))
25005 seh_nop_p = true;
25006 break;
25010 /* If we didn't find a real insn following the call, prevent the
25011 unwinder from looking into the next function. */
25012 if (i == NULL)
25013 seh_nop_p = true;
25016 if (direct_p)
25017 xasm = "call\t%P0";
25018 else
25019 xasm = "call\t%A0";
25021 output_asm_insn (xasm, &call_op);
25023 if (seh_nop_p)
25024 return "nop";
25026 return "";
25029 /* Clear stack slot assignments remembered from previous functions.
25030 This is called from INIT_EXPANDERS once before RTL is emitted for each
25031 function. */
25033 static struct machine_function *
25034 ix86_init_machine_status (void)
25036 struct machine_function *f;
25038 f = ggc_cleared_alloc<machine_function> ();
25039 f->use_fast_prologue_epilogue_nregs = -1;
25040 f->call_abi = ix86_abi;
25042 return f;
25045 /* Return a MEM corresponding to a stack slot with mode MODE.
25046 Allocate a new slot if necessary.
25048 The RTL for a function can have several slots available: N is
25049 which slot to use. */
25052 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25054 struct stack_local_entry *s;
25056 gcc_assert (n < MAX_386_STACK_LOCALS);
25058 for (s = ix86_stack_locals; s; s = s->next)
25059 if (s->mode == mode && s->n == n)
25060 return validize_mem (copy_rtx (s->rtl));
25062 s = ggc_alloc<stack_local_entry> ();
25063 s->n = n;
25064 s->mode = mode;
25065 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25067 s->next = ix86_stack_locals;
25068 ix86_stack_locals = s;
25069 return validize_mem (s->rtl);
25072 static void
25073 ix86_instantiate_decls (void)
25075 struct stack_local_entry *s;
25077 for (s = ix86_stack_locals; s; s = s->next)
25078 if (s->rtl != NULL_RTX)
25079 instantiate_decl_rtl (s->rtl);
25082 /* Check whether x86 address PARTS is a pc-relative address. */
25084 static bool
25085 rip_relative_addr_p (struct ix86_address *parts)
25087 rtx base, index, disp;
25089 base = parts->base;
25090 index = parts->index;
25091 disp = parts->disp;
25093 if (disp && !base && !index)
25095 if (TARGET_64BIT)
25097 rtx symbol = disp;
25099 if (GET_CODE (disp) == CONST)
25100 symbol = XEXP (disp, 0);
25101 if (GET_CODE (symbol) == PLUS
25102 && CONST_INT_P (XEXP (symbol, 1)))
25103 symbol = XEXP (symbol, 0);
25105 if (GET_CODE (symbol) == LABEL_REF
25106 || (GET_CODE (symbol) == SYMBOL_REF
25107 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25108 || (GET_CODE (symbol) == UNSPEC
25109 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25110 || XINT (symbol, 1) == UNSPEC_PCREL
25111 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25112 return true;
25115 return false;
25118 /* Calculate the length of the memory address in the instruction encoding.
25119 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25120 or other prefixes. We never generate addr32 prefix for LEA insn. */
25123 memory_address_length (rtx addr, bool lea)
25125 struct ix86_address parts;
25126 rtx base, index, disp;
25127 int len;
25128 int ok;
25130 if (GET_CODE (addr) == PRE_DEC
25131 || GET_CODE (addr) == POST_INC
25132 || GET_CODE (addr) == PRE_MODIFY
25133 || GET_CODE (addr) == POST_MODIFY)
25134 return 0;
25136 ok = ix86_decompose_address (addr, &parts);
25137 gcc_assert (ok);
25139 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25141 /* If this is not LEA instruction, add the length of addr32 prefix. */
25142 if (TARGET_64BIT && !lea
25143 && (SImode_address_operand (addr, VOIDmode)
25144 || (parts.base && GET_MODE (parts.base) == SImode)
25145 || (parts.index && GET_MODE (parts.index) == SImode)))
25146 len++;
25148 base = parts.base;
25149 index = parts.index;
25150 disp = parts.disp;
25152 if (base && GET_CODE (base) == SUBREG)
25153 base = SUBREG_REG (base);
25154 if (index && GET_CODE (index) == SUBREG)
25155 index = SUBREG_REG (index);
25157 gcc_assert (base == NULL_RTX || REG_P (base));
25158 gcc_assert (index == NULL_RTX || REG_P (index));
25160 /* Rule of thumb:
25161 - esp as the base always wants an index,
25162 - ebp as the base always wants a displacement,
25163 - r12 as the base always wants an index,
25164 - r13 as the base always wants a displacement. */
25166 /* Register Indirect. */
25167 if (base && !index && !disp)
25169 /* esp (for its index) and ebp (for its displacement) need
25170 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25171 code. */
25172 if (base == arg_pointer_rtx
25173 || base == frame_pointer_rtx
25174 || REGNO (base) == SP_REG
25175 || REGNO (base) == BP_REG
25176 || REGNO (base) == R12_REG
25177 || REGNO (base) == R13_REG)
25178 len++;
25181 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25182 is not disp32, but disp32(%rip), so for disp32
25183 SIB byte is needed, unless print_operand_address
25184 optimizes it into disp32(%rip) or (%rip) is implied
25185 by UNSPEC. */
25186 else if (disp && !base && !index)
25188 len += 4;
25189 if (rip_relative_addr_p (&parts))
25190 len++;
25192 else
25194 /* Find the length of the displacement constant. */
25195 if (disp)
25197 if (base && satisfies_constraint_K (disp))
25198 len += 1;
25199 else
25200 len += 4;
25202 /* ebp always wants a displacement. Similarly r13. */
25203 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25204 len++;
25206 /* An index requires the two-byte modrm form.... */
25207 if (index
25208 /* ...like esp (or r12), which always wants an index. */
25209 || base == arg_pointer_rtx
25210 || base == frame_pointer_rtx
25211 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25212 len++;
25215 return len;
25218 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25219 is set, expect that insn have 8bit immediate alternative. */
25221 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25223 int len = 0;
25224 int i;
25225 extract_insn_cached (insn);
25226 for (i = recog_data.n_operands - 1; i >= 0; --i)
25227 if (CONSTANT_P (recog_data.operand[i]))
25229 enum attr_mode mode = get_attr_mode (insn);
25231 gcc_assert (!len);
25232 if (shortform && CONST_INT_P (recog_data.operand[i]))
25234 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25235 switch (mode)
25237 case MODE_QI:
25238 len = 1;
25239 continue;
25240 case MODE_HI:
25241 ival = trunc_int_for_mode (ival, HImode);
25242 break;
25243 case MODE_SI:
25244 ival = trunc_int_for_mode (ival, SImode);
25245 break;
25246 default:
25247 break;
25249 if (IN_RANGE (ival, -128, 127))
25251 len = 1;
25252 continue;
25255 switch (mode)
25257 case MODE_QI:
25258 len = 1;
25259 break;
25260 case MODE_HI:
25261 len = 2;
25262 break;
25263 case MODE_SI:
25264 len = 4;
25265 break;
25266 /* Immediates for DImode instructions are encoded
25267 as 32bit sign extended values. */
25268 case MODE_DI:
25269 len = 4;
25270 break;
25271 default:
25272 fatal_insn ("unknown insn mode", insn);
25275 return len;
25278 /* Compute default value for "length_address" attribute. */
25280 ix86_attr_length_address_default (rtx insn)
25282 int i;
25284 if (get_attr_type (insn) == TYPE_LEA)
25286 rtx set = PATTERN (insn), addr;
25288 if (GET_CODE (set) == PARALLEL)
25289 set = XVECEXP (set, 0, 0);
25291 gcc_assert (GET_CODE (set) == SET);
25293 addr = SET_SRC (set);
25295 return memory_address_length (addr, true);
25298 extract_insn_cached (insn);
25299 for (i = recog_data.n_operands - 1; i >= 0; --i)
25300 if (MEM_P (recog_data.operand[i]))
25302 constrain_operands_cached (reload_completed);
25303 if (which_alternative != -1)
25305 const char *constraints = recog_data.constraints[i];
25306 int alt = which_alternative;
25308 while (*constraints == '=' || *constraints == '+')
25309 constraints++;
25310 while (alt-- > 0)
25311 while (*constraints++ != ',')
25313 /* Skip ignored operands. */
25314 if (*constraints == 'X')
25315 continue;
25317 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25319 return 0;
25322 /* Compute default value for "length_vex" attribute. It includes
25323 2 or 3 byte VEX prefix and 1 opcode byte. */
25326 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25328 int i;
25330 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25331 byte VEX prefix. */
25332 if (!has_0f_opcode || has_vex_w)
25333 return 3 + 1;
25335 /* We can always use 2 byte VEX prefix in 32bit. */
25336 if (!TARGET_64BIT)
25337 return 2 + 1;
25339 extract_insn_cached (insn);
25341 for (i = recog_data.n_operands - 1; i >= 0; --i)
25342 if (REG_P (recog_data.operand[i]))
25344 /* REX.W bit uses 3 byte VEX prefix. */
25345 if (GET_MODE (recog_data.operand[i]) == DImode
25346 && GENERAL_REG_P (recog_data.operand[i]))
25347 return 3 + 1;
25349 else
25351 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25352 if (MEM_P (recog_data.operand[i])
25353 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25354 return 3 + 1;
25357 return 2 + 1;
25360 /* Return the maximum number of instructions a cpu can issue. */
25362 static int
25363 ix86_issue_rate (void)
25365 switch (ix86_tune)
25367 case PROCESSOR_PENTIUM:
25368 case PROCESSOR_BONNELL:
25369 case PROCESSOR_SILVERMONT:
25370 case PROCESSOR_INTEL:
25371 case PROCESSOR_K6:
25372 case PROCESSOR_BTVER2:
25373 case PROCESSOR_PENTIUM4:
25374 case PROCESSOR_NOCONA:
25375 return 2;
25377 case PROCESSOR_PENTIUMPRO:
25378 case PROCESSOR_ATHLON:
25379 case PROCESSOR_K8:
25380 case PROCESSOR_AMDFAM10:
25381 case PROCESSOR_GENERIC:
25382 case PROCESSOR_BTVER1:
25383 return 3;
25385 case PROCESSOR_BDVER1:
25386 case PROCESSOR_BDVER2:
25387 case PROCESSOR_BDVER3:
25388 case PROCESSOR_BDVER4:
25389 case PROCESSOR_CORE2:
25390 case PROCESSOR_NEHALEM:
25391 case PROCESSOR_SANDYBRIDGE:
25392 case PROCESSOR_HASWELL:
25393 return 4;
25395 default:
25396 return 1;
25400 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25401 by DEP_INSN and nothing set by DEP_INSN. */
25403 static bool
25404 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25406 rtx set, set2;
25408 /* Simplify the test for uninteresting insns. */
25409 if (insn_type != TYPE_SETCC
25410 && insn_type != TYPE_ICMOV
25411 && insn_type != TYPE_FCMOV
25412 && insn_type != TYPE_IBR)
25413 return false;
25415 if ((set = single_set (dep_insn)) != 0)
25417 set = SET_DEST (set);
25418 set2 = NULL_RTX;
25420 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25421 && XVECLEN (PATTERN (dep_insn), 0) == 2
25422 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25423 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25425 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25426 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25428 else
25429 return false;
25431 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25432 return false;
25434 /* This test is true if the dependent insn reads the flags but
25435 not any other potentially set register. */
25436 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25437 return false;
25439 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25440 return false;
25442 return true;
25445 /* Return true iff USE_INSN has a memory address with operands set by
25446 SET_INSN. */
25448 bool
25449 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25451 int i;
25452 extract_insn_cached (use_insn);
25453 for (i = recog_data.n_operands - 1; i >= 0; --i)
25454 if (MEM_P (recog_data.operand[i]))
25456 rtx addr = XEXP (recog_data.operand[i], 0);
25457 return modified_in_p (addr, set_insn) != 0;
25459 return false;
25462 /* Helper function for exact_store_load_dependency.
25463 Return true if addr is found in insn. */
25464 static bool
25465 exact_dependency_1 (rtx addr, rtx insn)
25467 enum rtx_code code;
25468 const char *format_ptr;
25469 int i, j;
25471 code = GET_CODE (insn);
25472 switch (code)
25474 case MEM:
25475 if (rtx_equal_p (addr, insn))
25476 return true;
25477 break;
25478 case REG:
25479 CASE_CONST_ANY:
25480 case SYMBOL_REF:
25481 case CODE_LABEL:
25482 case PC:
25483 case CC0:
25484 case EXPR_LIST:
25485 return false;
25486 default:
25487 break;
25490 format_ptr = GET_RTX_FORMAT (code);
25491 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25493 switch (*format_ptr++)
25495 case 'e':
25496 if (exact_dependency_1 (addr, XEXP (insn, i)))
25497 return true;
25498 break;
25499 case 'E':
25500 for (j = 0; j < XVECLEN (insn, i); j++)
25501 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25502 return true;
25503 break;
25506 return false;
25509 /* Return true if there exists exact dependency for store & load, i.e.
25510 the same memory address is used in them. */
25511 static bool
25512 exact_store_load_dependency (rtx store, rtx load)
25514 rtx set1, set2;
25516 set1 = single_set (store);
25517 if (!set1)
25518 return false;
25519 if (!MEM_P (SET_DEST (set1)))
25520 return false;
25521 set2 = single_set (load);
25522 if (!set2)
25523 return false;
25524 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25525 return true;
25526 return false;
25529 static int
25530 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25532 enum attr_type insn_type, dep_insn_type;
25533 enum attr_memory memory;
25534 rtx set, set2;
25535 int dep_insn_code_number;
25537 /* Anti and output dependencies have zero cost on all CPUs. */
25538 if (REG_NOTE_KIND (link) != 0)
25539 return 0;
25541 dep_insn_code_number = recog_memoized (dep_insn);
25543 /* If we can't recognize the insns, we can't really do anything. */
25544 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25545 return cost;
25547 insn_type = get_attr_type (insn);
25548 dep_insn_type = get_attr_type (dep_insn);
25550 switch (ix86_tune)
25552 case PROCESSOR_PENTIUM:
25553 /* Address Generation Interlock adds a cycle of latency. */
25554 if (insn_type == TYPE_LEA)
25556 rtx addr = PATTERN (insn);
25558 if (GET_CODE (addr) == PARALLEL)
25559 addr = XVECEXP (addr, 0, 0);
25561 gcc_assert (GET_CODE (addr) == SET);
25563 addr = SET_SRC (addr);
25564 if (modified_in_p (addr, dep_insn))
25565 cost += 1;
25567 else if (ix86_agi_dependent (dep_insn, insn))
25568 cost += 1;
25570 /* ??? Compares pair with jump/setcc. */
25571 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25572 cost = 0;
25574 /* Floating point stores require value to be ready one cycle earlier. */
25575 if (insn_type == TYPE_FMOV
25576 && get_attr_memory (insn) == MEMORY_STORE
25577 && !ix86_agi_dependent (dep_insn, insn))
25578 cost += 1;
25579 break;
25581 case PROCESSOR_PENTIUMPRO:
25582 /* INT->FP conversion is expensive. */
25583 if (get_attr_fp_int_src (dep_insn))
25584 cost += 5;
25586 /* There is one cycle extra latency between an FP op and a store. */
25587 if (insn_type == TYPE_FMOV
25588 && (set = single_set (dep_insn)) != NULL_RTX
25589 && (set2 = single_set (insn)) != NULL_RTX
25590 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25591 && MEM_P (SET_DEST (set2)))
25592 cost += 1;
25594 memory = get_attr_memory (insn);
25596 /* Show ability of reorder buffer to hide latency of load by executing
25597 in parallel with previous instruction in case
25598 previous instruction is not needed to compute the address. */
25599 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25600 && !ix86_agi_dependent (dep_insn, insn))
25602 /* Claim moves to take one cycle, as core can issue one load
25603 at time and the next load can start cycle later. */
25604 if (dep_insn_type == TYPE_IMOV
25605 || dep_insn_type == TYPE_FMOV)
25606 cost = 1;
25607 else if (cost > 1)
25608 cost--;
25610 break;
25612 case PROCESSOR_K6:
25613 /* The esp dependency is resolved before
25614 the instruction is really finished. */
25615 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25616 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25617 return 1;
25619 /* INT->FP conversion is expensive. */
25620 if (get_attr_fp_int_src (dep_insn))
25621 cost += 5;
25623 memory = get_attr_memory (insn);
25625 /* Show ability of reorder buffer to hide latency of load by executing
25626 in parallel with previous instruction in case
25627 previous instruction is not needed to compute the address. */
25628 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25629 && !ix86_agi_dependent (dep_insn, insn))
25631 /* Claim moves to take one cycle, as core can issue one load
25632 at time and the next load can start cycle later. */
25633 if (dep_insn_type == TYPE_IMOV
25634 || dep_insn_type == TYPE_FMOV)
25635 cost = 1;
25636 else if (cost > 2)
25637 cost -= 2;
25638 else
25639 cost = 1;
25641 break;
25643 case PROCESSOR_AMDFAM10:
25644 case PROCESSOR_BDVER1:
25645 case PROCESSOR_BDVER2:
25646 case PROCESSOR_BDVER3:
25647 case PROCESSOR_BDVER4:
25648 case PROCESSOR_BTVER1:
25649 case PROCESSOR_BTVER2:
25650 case PROCESSOR_GENERIC:
25651 /* Stack engine allows to execute push&pop instructions in parall. */
25652 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25653 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25654 return 0;
25655 /* FALLTHRU */
25657 case PROCESSOR_ATHLON:
25658 case PROCESSOR_K8:
25659 memory = get_attr_memory (insn);
25661 /* Show ability of reorder buffer to hide latency of load by executing
25662 in parallel with previous instruction in case
25663 previous instruction is not needed to compute the address. */
25664 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25665 && !ix86_agi_dependent (dep_insn, insn))
25667 enum attr_unit unit = get_attr_unit (insn);
25668 int loadcost = 3;
25670 /* Because of the difference between the length of integer and
25671 floating unit pipeline preparation stages, the memory operands
25672 for floating point are cheaper.
25674 ??? For Athlon it the difference is most probably 2. */
25675 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25676 loadcost = 3;
25677 else
25678 loadcost = TARGET_ATHLON ? 2 : 0;
25680 if (cost >= loadcost)
25681 cost -= loadcost;
25682 else
25683 cost = 0;
25685 break;
25687 case PROCESSOR_CORE2:
25688 case PROCESSOR_NEHALEM:
25689 case PROCESSOR_SANDYBRIDGE:
25690 case PROCESSOR_HASWELL:
25691 /* Stack engine allows to execute push&pop instructions in parall. */
25692 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25693 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25694 return 0;
25696 memory = get_attr_memory (insn);
25698 /* Show ability of reorder buffer to hide latency of load by executing
25699 in parallel with previous instruction in case
25700 previous instruction is not needed to compute the address. */
25701 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25702 && !ix86_agi_dependent (dep_insn, insn))
25704 if (cost >= 4)
25705 cost -= 4;
25706 else
25707 cost = 0;
25709 break;
25711 case PROCESSOR_SILVERMONT:
25712 case PROCESSOR_INTEL:
25713 if (!reload_completed)
25714 return cost;
25716 /* Increase cost of integer loads. */
25717 memory = get_attr_memory (dep_insn);
25718 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25720 enum attr_unit unit = get_attr_unit (dep_insn);
25721 if (unit == UNIT_INTEGER && cost == 1)
25723 if (memory == MEMORY_LOAD)
25724 cost = 3;
25725 else
25727 /* Increase cost of ld/st for short int types only
25728 because of store forwarding issue. */
25729 rtx set = single_set (dep_insn);
25730 if (set && (GET_MODE (SET_DEST (set)) == QImode
25731 || GET_MODE (SET_DEST (set)) == HImode))
25733 /* Increase cost of store/load insn if exact
25734 dependence exists and it is load insn. */
25735 enum attr_memory insn_memory = get_attr_memory (insn);
25736 if (insn_memory == MEMORY_LOAD
25737 && exact_store_load_dependency (dep_insn, insn))
25738 cost = 3;
25744 default:
25745 break;
25748 return cost;
25751 /* How many alternative schedules to try. This should be as wide as the
25752 scheduling freedom in the DFA, but no wider. Making this value too
25753 large results extra work for the scheduler. */
25755 static int
25756 ia32_multipass_dfa_lookahead (void)
25758 switch (ix86_tune)
25760 case PROCESSOR_PENTIUM:
25761 return 2;
25763 case PROCESSOR_PENTIUMPRO:
25764 case PROCESSOR_K6:
25765 return 1;
25767 case PROCESSOR_BDVER1:
25768 case PROCESSOR_BDVER2:
25769 case PROCESSOR_BDVER3:
25770 case PROCESSOR_BDVER4:
25771 /* We use lookahead value 4 for BD both before and after reload
25772 schedules. Plan is to have value 8 included for O3. */
25773 return 4;
25775 case PROCESSOR_CORE2:
25776 case PROCESSOR_NEHALEM:
25777 case PROCESSOR_SANDYBRIDGE:
25778 case PROCESSOR_HASWELL:
25779 case PROCESSOR_BONNELL:
25780 case PROCESSOR_SILVERMONT:
25781 case PROCESSOR_INTEL:
25782 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25783 as many instructions can be executed on a cycle, i.e.,
25784 issue_rate. I wonder why tuning for many CPUs does not do this. */
25785 if (reload_completed)
25786 return ix86_issue_rate ();
25787 /* Don't use lookahead for pre-reload schedule to save compile time. */
25788 return 0;
25790 default:
25791 return 0;
25795 /* Return true if target platform supports macro-fusion. */
25797 static bool
25798 ix86_macro_fusion_p ()
25800 return TARGET_FUSE_CMP_AND_BRANCH;
25803 /* Check whether current microarchitecture support macro fusion
25804 for insn pair "CONDGEN + CONDJMP". Refer to
25805 "Intel Architectures Optimization Reference Manual". */
25807 static bool
25808 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25810 rtx src, dest;
25811 rtx single_set = single_set (condgen);
25812 enum rtx_code ccode;
25813 rtx compare_set = NULL_RTX, test_if, cond;
25814 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25816 if (get_attr_type (condgen) != TYPE_TEST
25817 && get_attr_type (condgen) != TYPE_ICMP
25818 && get_attr_type (condgen) != TYPE_INCDEC
25819 && get_attr_type (condgen) != TYPE_ALU)
25820 return false;
25822 if (single_set == NULL_RTX
25823 && !TARGET_FUSE_ALU_AND_BRANCH)
25824 return false;
25826 if (single_set != NULL_RTX)
25827 compare_set = single_set;
25828 else
25830 int i;
25831 rtx pat = PATTERN (condgen);
25832 for (i = 0; i < XVECLEN (pat, 0); i++)
25833 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25835 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25836 if (GET_CODE (set_src) == COMPARE)
25837 compare_set = XVECEXP (pat, 0, i);
25838 else
25839 alu_set = XVECEXP (pat, 0, i);
25842 if (compare_set == NULL_RTX)
25843 return false;
25844 src = SET_SRC (compare_set);
25845 if (GET_CODE (src) != COMPARE)
25846 return false;
25848 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25849 supported. */
25850 if ((MEM_P (XEXP (src, 0))
25851 && CONST_INT_P (XEXP (src, 1)))
25852 || (MEM_P (XEXP (src, 1))
25853 && CONST_INT_P (XEXP (src, 0))))
25854 return false;
25856 /* No fusion for RIP-relative address. */
25857 if (MEM_P (XEXP (src, 0)))
25858 addr = XEXP (XEXP (src, 0), 0);
25859 else if (MEM_P (XEXP (src, 1)))
25860 addr = XEXP (XEXP (src, 1), 0);
25862 if (addr) {
25863 ix86_address parts;
25864 int ok = ix86_decompose_address (addr, &parts);
25865 gcc_assert (ok);
25867 if (rip_relative_addr_p (&parts))
25868 return false;
25871 test_if = SET_SRC (pc_set (condjmp));
25872 cond = XEXP (test_if, 0);
25873 ccode = GET_CODE (cond);
25874 /* Check whether conditional jump use Sign or Overflow Flags. */
25875 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25876 && (ccode == GE
25877 || ccode == GT
25878 || ccode == LE
25879 || ccode == LT))
25880 return false;
25882 /* Return true for TYPE_TEST and TYPE_ICMP. */
25883 if (get_attr_type (condgen) == TYPE_TEST
25884 || get_attr_type (condgen) == TYPE_ICMP)
25885 return true;
25887 /* The following is the case that macro-fusion for alu + jmp. */
25888 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25889 return false;
25891 /* No fusion for alu op with memory destination operand. */
25892 dest = SET_DEST (alu_set);
25893 if (MEM_P (dest))
25894 return false;
25896 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25897 supported. */
25898 if (get_attr_type (condgen) == TYPE_INCDEC
25899 && (ccode == GEU
25900 || ccode == GTU
25901 || ccode == LEU
25902 || ccode == LTU))
25903 return false;
25905 return true;
25908 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25909 execution. It is applied if
25910 (1) IMUL instruction is on the top of list;
25911 (2) There exists the only producer of independent IMUL instruction in
25912 ready list.
25913 Return index of IMUL producer if it was found and -1 otherwise. */
25914 static int
25915 do_reorder_for_imul (rtx *ready, int n_ready)
25917 rtx insn, set, insn1, insn2;
25918 sd_iterator_def sd_it;
25919 dep_t dep;
25920 int index = -1;
25921 int i;
25923 if (!TARGET_BONNELL)
25924 return index;
25926 /* Check that IMUL instruction is on the top of ready list. */
25927 insn = ready[n_ready - 1];
25928 set = single_set (insn);
25929 if (!set)
25930 return index;
25931 if (!(GET_CODE (SET_SRC (set)) == MULT
25932 && GET_MODE (SET_SRC (set)) == SImode))
25933 return index;
25935 /* Search for producer of independent IMUL instruction. */
25936 for (i = n_ready - 2; i >= 0; i--)
25938 insn = ready[i];
25939 if (!NONDEBUG_INSN_P (insn))
25940 continue;
25941 /* Skip IMUL instruction. */
25942 insn2 = PATTERN (insn);
25943 if (GET_CODE (insn2) == PARALLEL)
25944 insn2 = XVECEXP (insn2, 0, 0);
25945 if (GET_CODE (insn2) == SET
25946 && GET_CODE (SET_SRC (insn2)) == MULT
25947 && GET_MODE (SET_SRC (insn2)) == SImode)
25948 continue;
25950 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25952 rtx con;
25953 con = DEP_CON (dep);
25954 if (!NONDEBUG_INSN_P (con))
25955 continue;
25956 insn1 = PATTERN (con);
25957 if (GET_CODE (insn1) == PARALLEL)
25958 insn1 = XVECEXP (insn1, 0, 0);
25960 if (GET_CODE (insn1) == SET
25961 && GET_CODE (SET_SRC (insn1)) == MULT
25962 && GET_MODE (SET_SRC (insn1)) == SImode)
25964 sd_iterator_def sd_it1;
25965 dep_t dep1;
25966 /* Check if there is no other dependee for IMUL. */
25967 index = i;
25968 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25970 rtx pro;
25971 pro = DEP_PRO (dep1);
25972 if (!NONDEBUG_INSN_P (pro))
25973 continue;
25974 if (pro != insn)
25975 index = -1;
25977 if (index >= 0)
25978 break;
25981 if (index >= 0)
25982 break;
25984 return index;
25987 /* Try to find the best candidate on the top of ready list if two insns
25988 have the same priority - candidate is best if its dependees were
25989 scheduled earlier. Applied for Silvermont only.
25990 Return true if top 2 insns must be interchanged. */
25991 static bool
25992 swap_top_of_ready_list (rtx *ready, int n_ready)
25994 rtx top = ready[n_ready - 1];
25995 rtx next = ready[n_ready - 2];
25996 rtx set;
25997 sd_iterator_def sd_it;
25998 dep_t dep;
25999 int clock1 = -1;
26000 int clock2 = -1;
26001 #define INSN_TICK(INSN) (HID (INSN)->tick)
26003 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26004 return false;
26006 if (!NONDEBUG_INSN_P (top))
26007 return false;
26008 if (!NONJUMP_INSN_P (top))
26009 return false;
26010 if (!NONDEBUG_INSN_P (next))
26011 return false;
26012 if (!NONJUMP_INSN_P (next))
26013 return false;
26014 set = single_set (top);
26015 if (!set)
26016 return false;
26017 set = single_set (next);
26018 if (!set)
26019 return false;
26021 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26023 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26024 return false;
26025 /* Determine winner more precise. */
26026 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26028 rtx pro;
26029 pro = DEP_PRO (dep);
26030 if (!NONDEBUG_INSN_P (pro))
26031 continue;
26032 if (INSN_TICK (pro) > clock1)
26033 clock1 = INSN_TICK (pro);
26035 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26037 rtx pro;
26038 pro = DEP_PRO (dep);
26039 if (!NONDEBUG_INSN_P (pro))
26040 continue;
26041 if (INSN_TICK (pro) > clock2)
26042 clock2 = INSN_TICK (pro);
26045 if (clock1 == clock2)
26047 /* Determine winner - load must win. */
26048 enum attr_memory memory1, memory2;
26049 memory1 = get_attr_memory (top);
26050 memory2 = get_attr_memory (next);
26051 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26052 return true;
26054 return (bool) (clock2 < clock1);
26056 return false;
26057 #undef INSN_TICK
26060 /* Perform possible reodering of ready list for Atom/Silvermont only.
26061 Return issue rate. */
26062 static int
26063 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26064 int clock_var)
26066 int issue_rate = -1;
26067 int n_ready = *pn_ready;
26068 int i;
26069 rtx insn;
26070 int index = -1;
26072 /* Set up issue rate. */
26073 issue_rate = ix86_issue_rate ();
26075 /* Do reodering for BONNELL/SILVERMONT only. */
26076 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26077 return issue_rate;
26079 /* Nothing to do if ready list contains only 1 instruction. */
26080 if (n_ready <= 1)
26081 return issue_rate;
26083 /* Do reodering for post-reload scheduler only. */
26084 if (!reload_completed)
26085 return issue_rate;
26087 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26089 if (sched_verbose > 1)
26090 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26091 INSN_UID (ready[index]));
26093 /* Put IMUL producer (ready[index]) at the top of ready list. */
26094 insn = ready[index];
26095 for (i = index; i < n_ready - 1; i++)
26096 ready[i] = ready[i + 1];
26097 ready[n_ready - 1] = insn;
26098 return issue_rate;
26100 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26102 if (sched_verbose > 1)
26103 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26104 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26105 /* Swap 2 top elements of ready list. */
26106 insn = ready[n_ready - 1];
26107 ready[n_ready - 1] = ready[n_ready - 2];
26108 ready[n_ready - 2] = insn;
26110 return issue_rate;
26113 static bool
26114 ix86_class_likely_spilled_p (reg_class_t);
26116 /* Returns true if lhs of insn is HW function argument register and set up
26117 is_spilled to true if it is likely spilled HW register. */
26118 static bool
26119 insn_is_function_arg (rtx insn, bool* is_spilled)
26121 rtx dst;
26123 if (!NONDEBUG_INSN_P (insn))
26124 return false;
26125 /* Call instructions are not movable, ignore it. */
26126 if (CALL_P (insn))
26127 return false;
26128 insn = PATTERN (insn);
26129 if (GET_CODE (insn) == PARALLEL)
26130 insn = XVECEXP (insn, 0, 0);
26131 if (GET_CODE (insn) != SET)
26132 return false;
26133 dst = SET_DEST (insn);
26134 if (REG_P (dst) && HARD_REGISTER_P (dst)
26135 && ix86_function_arg_regno_p (REGNO (dst)))
26137 /* Is it likely spilled HW register? */
26138 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26139 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26140 *is_spilled = true;
26141 return true;
26143 return false;
26146 /* Add output dependencies for chain of function adjacent arguments if only
26147 there is a move to likely spilled HW register. Return first argument
26148 if at least one dependence was added or NULL otherwise. */
26149 static rtx
26150 add_parameter_dependencies (rtx call, rtx head)
26152 rtx insn;
26153 rtx last = call;
26154 rtx first_arg = NULL;
26155 bool is_spilled = false;
26157 head = PREV_INSN (head);
26159 /* Find nearest to call argument passing instruction. */
26160 while (true)
26162 last = PREV_INSN (last);
26163 if (last == head)
26164 return NULL;
26165 if (!NONDEBUG_INSN_P (last))
26166 continue;
26167 if (insn_is_function_arg (last, &is_spilled))
26168 break;
26169 return NULL;
26172 first_arg = last;
26173 while (true)
26175 insn = PREV_INSN (last);
26176 if (!INSN_P (insn))
26177 break;
26178 if (insn == head)
26179 break;
26180 if (!NONDEBUG_INSN_P (insn))
26182 last = insn;
26183 continue;
26185 if (insn_is_function_arg (insn, &is_spilled))
26187 /* Add output depdendence between two function arguments if chain
26188 of output arguments contains likely spilled HW registers. */
26189 if (is_spilled)
26190 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26191 first_arg = last = insn;
26193 else
26194 break;
26196 if (!is_spilled)
26197 return NULL;
26198 return first_arg;
26201 /* Add output or anti dependency from insn to first_arg to restrict its code
26202 motion. */
26203 static void
26204 avoid_func_arg_motion (rtx first_arg, rtx insn)
26206 rtx set;
26207 rtx tmp;
26209 set = single_set (insn);
26210 if (!set)
26211 return;
26212 tmp = SET_DEST (set);
26213 if (REG_P (tmp))
26215 /* Add output dependency to the first function argument. */
26216 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26217 return;
26219 /* Add anti dependency. */
26220 add_dependence (first_arg, insn, REG_DEP_ANTI);
26223 /* Avoid cross block motion of function argument through adding dependency
26224 from the first non-jump instruction in bb. */
26225 static void
26226 add_dependee_for_func_arg (rtx arg, basic_block bb)
26228 rtx insn = BB_END (bb);
26230 while (insn)
26232 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26234 rtx set = single_set (insn);
26235 if (set)
26237 avoid_func_arg_motion (arg, insn);
26238 return;
26241 if (insn == BB_HEAD (bb))
26242 return;
26243 insn = PREV_INSN (insn);
26247 /* Hook for pre-reload schedule - avoid motion of function arguments
26248 passed in likely spilled HW registers. */
26249 static void
26250 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26252 rtx insn;
26253 rtx first_arg = NULL;
26254 if (reload_completed)
26255 return;
26256 while (head != tail && DEBUG_INSN_P (head))
26257 head = NEXT_INSN (head);
26258 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26259 if (INSN_P (insn) && CALL_P (insn))
26261 first_arg = add_parameter_dependencies (insn, head);
26262 if (first_arg)
26264 /* Add dependee for first argument to predecessors if only
26265 region contains more than one block. */
26266 basic_block bb = BLOCK_FOR_INSN (insn);
26267 int rgn = CONTAINING_RGN (bb->index);
26268 int nr_blks = RGN_NR_BLOCKS (rgn);
26269 /* Skip trivial regions and region head blocks that can have
26270 predecessors outside of region. */
26271 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26273 edge e;
26274 edge_iterator ei;
26276 /* Regions are SCCs with the exception of selective
26277 scheduling with pipelining of outer blocks enabled.
26278 So also check that immediate predecessors of a non-head
26279 block are in the same region. */
26280 FOR_EACH_EDGE (e, ei, bb->preds)
26282 /* Avoid creating of loop-carried dependencies through
26283 using topological ordering in the region. */
26284 if (rgn == CONTAINING_RGN (e->src->index)
26285 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26286 add_dependee_for_func_arg (first_arg, e->src);
26289 insn = first_arg;
26290 if (insn == head)
26291 break;
26294 else if (first_arg)
26295 avoid_func_arg_motion (first_arg, insn);
26298 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26299 HW registers to maximum, to schedule them at soon as possible. These are
26300 moves from function argument registers at the top of the function entry
26301 and moves from function return value registers after call. */
26302 static int
26303 ix86_adjust_priority (rtx insn, int priority)
26305 rtx set;
26307 if (reload_completed)
26308 return priority;
26310 if (!NONDEBUG_INSN_P (insn))
26311 return priority;
26313 set = single_set (insn);
26314 if (set)
26316 rtx tmp = SET_SRC (set);
26317 if (REG_P (tmp)
26318 && HARD_REGISTER_P (tmp)
26319 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26320 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26321 return current_sched_info->sched_max_insns_priority;
26324 return priority;
26327 /* Model decoder of Core 2/i7.
26328 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26329 track the instruction fetch block boundaries and make sure that long
26330 (9+ bytes) instructions are assigned to D0. */
26332 /* Maximum length of an insn that can be handled by
26333 a secondary decoder unit. '8' for Core 2/i7. */
26334 static int core2i7_secondary_decoder_max_insn_size;
26336 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26337 '16' for Core 2/i7. */
26338 static int core2i7_ifetch_block_size;
26340 /* Maximum number of instructions decoder can handle per cycle.
26341 '6' for Core 2/i7. */
26342 static int core2i7_ifetch_block_max_insns;
26344 typedef struct ix86_first_cycle_multipass_data_ *
26345 ix86_first_cycle_multipass_data_t;
26346 typedef const struct ix86_first_cycle_multipass_data_ *
26347 const_ix86_first_cycle_multipass_data_t;
26349 /* A variable to store target state across calls to max_issue within
26350 one cycle. */
26351 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26352 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26354 /* Initialize DATA. */
26355 static void
26356 core2i7_first_cycle_multipass_init (void *_data)
26358 ix86_first_cycle_multipass_data_t data
26359 = (ix86_first_cycle_multipass_data_t) _data;
26361 data->ifetch_block_len = 0;
26362 data->ifetch_block_n_insns = 0;
26363 data->ready_try_change = NULL;
26364 data->ready_try_change_size = 0;
26367 /* Advancing the cycle; reset ifetch block counts. */
26368 static void
26369 core2i7_dfa_post_advance_cycle (void)
26371 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26373 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26375 data->ifetch_block_len = 0;
26376 data->ifetch_block_n_insns = 0;
26379 static int min_insn_size (rtx);
26381 /* Filter out insns from ready_try that the core will not be able to issue
26382 on current cycle due to decoder. */
26383 static void
26384 core2i7_first_cycle_multipass_filter_ready_try
26385 (const_ix86_first_cycle_multipass_data_t data,
26386 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26388 while (n_ready--)
26390 rtx insn;
26391 int insn_size;
26393 if (ready_try[n_ready])
26394 continue;
26396 insn = get_ready_element (n_ready);
26397 insn_size = min_insn_size (insn);
26399 if (/* If this is a too long an insn for a secondary decoder ... */
26400 (!first_cycle_insn_p
26401 && insn_size > core2i7_secondary_decoder_max_insn_size)
26402 /* ... or it would not fit into the ifetch block ... */
26403 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26404 /* ... or the decoder is full already ... */
26405 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26406 /* ... mask the insn out. */
26408 ready_try[n_ready] = 1;
26410 if (data->ready_try_change)
26411 bitmap_set_bit (data->ready_try_change, n_ready);
26416 /* Prepare for a new round of multipass lookahead scheduling. */
26417 static void
26418 core2i7_first_cycle_multipass_begin (void *_data,
26419 signed char *ready_try, int n_ready,
26420 bool first_cycle_insn_p)
26422 ix86_first_cycle_multipass_data_t data
26423 = (ix86_first_cycle_multipass_data_t) _data;
26424 const_ix86_first_cycle_multipass_data_t prev_data
26425 = ix86_first_cycle_multipass_data;
26427 /* Restore the state from the end of the previous round. */
26428 data->ifetch_block_len = prev_data->ifetch_block_len;
26429 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26431 /* Filter instructions that cannot be issued on current cycle due to
26432 decoder restrictions. */
26433 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26434 first_cycle_insn_p);
26437 /* INSN is being issued in current solution. Account for its impact on
26438 the decoder model. */
26439 static void
26440 core2i7_first_cycle_multipass_issue (void *_data,
26441 signed char *ready_try, int n_ready,
26442 rtx insn, const void *_prev_data)
26444 ix86_first_cycle_multipass_data_t data
26445 = (ix86_first_cycle_multipass_data_t) _data;
26446 const_ix86_first_cycle_multipass_data_t prev_data
26447 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26449 int insn_size = min_insn_size (insn);
26451 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26452 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26453 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26454 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26456 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26457 if (!data->ready_try_change)
26459 data->ready_try_change = sbitmap_alloc (n_ready);
26460 data->ready_try_change_size = n_ready;
26462 else if (data->ready_try_change_size < n_ready)
26464 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26465 n_ready, 0);
26466 data->ready_try_change_size = n_ready;
26468 bitmap_clear (data->ready_try_change);
26470 /* Filter out insns from ready_try that the core will not be able to issue
26471 on current cycle due to decoder. */
26472 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26473 false);
26476 /* Revert the effect on ready_try. */
26477 static void
26478 core2i7_first_cycle_multipass_backtrack (const void *_data,
26479 signed char *ready_try,
26480 int n_ready ATTRIBUTE_UNUSED)
26482 const_ix86_first_cycle_multipass_data_t data
26483 = (const_ix86_first_cycle_multipass_data_t) _data;
26484 unsigned int i = 0;
26485 sbitmap_iterator sbi;
26487 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26488 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26490 ready_try[i] = 0;
26494 /* Save the result of multipass lookahead scheduling for the next round. */
26495 static void
26496 core2i7_first_cycle_multipass_end (const void *_data)
26498 const_ix86_first_cycle_multipass_data_t data
26499 = (const_ix86_first_cycle_multipass_data_t) _data;
26500 ix86_first_cycle_multipass_data_t next_data
26501 = ix86_first_cycle_multipass_data;
26503 if (data != NULL)
26505 next_data->ifetch_block_len = data->ifetch_block_len;
26506 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26510 /* Deallocate target data. */
26511 static void
26512 core2i7_first_cycle_multipass_fini (void *_data)
26514 ix86_first_cycle_multipass_data_t data
26515 = (ix86_first_cycle_multipass_data_t) _data;
26517 if (data->ready_try_change)
26519 sbitmap_free (data->ready_try_change);
26520 data->ready_try_change = NULL;
26521 data->ready_try_change_size = 0;
26525 /* Prepare for scheduling pass. */
26526 static void
26527 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26528 int verbose ATTRIBUTE_UNUSED,
26529 int max_uid ATTRIBUTE_UNUSED)
26531 /* Install scheduling hooks for current CPU. Some of these hooks are used
26532 in time-critical parts of the scheduler, so we only set them up when
26533 they are actually used. */
26534 switch (ix86_tune)
26536 case PROCESSOR_CORE2:
26537 case PROCESSOR_NEHALEM:
26538 case PROCESSOR_SANDYBRIDGE:
26539 case PROCESSOR_HASWELL:
26540 /* Do not perform multipass scheduling for pre-reload schedule
26541 to save compile time. */
26542 if (reload_completed)
26544 targetm.sched.dfa_post_advance_cycle
26545 = core2i7_dfa_post_advance_cycle;
26546 targetm.sched.first_cycle_multipass_init
26547 = core2i7_first_cycle_multipass_init;
26548 targetm.sched.first_cycle_multipass_begin
26549 = core2i7_first_cycle_multipass_begin;
26550 targetm.sched.first_cycle_multipass_issue
26551 = core2i7_first_cycle_multipass_issue;
26552 targetm.sched.first_cycle_multipass_backtrack
26553 = core2i7_first_cycle_multipass_backtrack;
26554 targetm.sched.first_cycle_multipass_end
26555 = core2i7_first_cycle_multipass_end;
26556 targetm.sched.first_cycle_multipass_fini
26557 = core2i7_first_cycle_multipass_fini;
26559 /* Set decoder parameters. */
26560 core2i7_secondary_decoder_max_insn_size = 8;
26561 core2i7_ifetch_block_size = 16;
26562 core2i7_ifetch_block_max_insns = 6;
26563 break;
26565 /* ... Fall through ... */
26566 default:
26567 targetm.sched.dfa_post_advance_cycle = NULL;
26568 targetm.sched.first_cycle_multipass_init = NULL;
26569 targetm.sched.first_cycle_multipass_begin = NULL;
26570 targetm.sched.first_cycle_multipass_issue = NULL;
26571 targetm.sched.first_cycle_multipass_backtrack = NULL;
26572 targetm.sched.first_cycle_multipass_end = NULL;
26573 targetm.sched.first_cycle_multipass_fini = NULL;
26574 break;
26579 /* Compute the alignment given to a constant that is being placed in memory.
26580 EXP is the constant and ALIGN is the alignment that the object would
26581 ordinarily have.
26582 The value of this function is used instead of that alignment to align
26583 the object. */
26586 ix86_constant_alignment (tree exp, int align)
26588 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26589 || TREE_CODE (exp) == INTEGER_CST)
26591 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26592 return 64;
26593 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26594 return 128;
26596 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26597 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26598 return BITS_PER_WORD;
26600 return align;
26603 /* Compute the alignment for a static variable.
26604 TYPE is the data type, and ALIGN is the alignment that
26605 the object would ordinarily have. The value of this function is used
26606 instead of that alignment to align the object. */
26609 ix86_data_alignment (tree type, int align, bool opt)
26611 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26612 for symbols from other compilation units or symbols that don't need
26613 to bind locally. In order to preserve some ABI compatibility with
26614 those compilers, ensure we don't decrease alignment from what we
26615 used to assume. */
26617 int max_align_compat
26618 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26620 /* A data structure, equal or greater than the size of a cache line
26621 (64 bytes in the Pentium 4 and other recent Intel processors, including
26622 processors based on Intel Core microarchitecture) should be aligned
26623 so that its base address is a multiple of a cache line size. */
26625 int max_align
26626 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26628 if (max_align < BITS_PER_WORD)
26629 max_align = BITS_PER_WORD;
26631 if (opt
26632 && AGGREGATE_TYPE_P (type)
26633 && TYPE_SIZE (type)
26634 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26636 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26637 && align < max_align_compat)
26638 align = max_align_compat;
26639 if (wi::geu_p (TYPE_SIZE (type), max_align)
26640 && align < max_align)
26641 align = max_align;
26644 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26645 to 16byte boundary. */
26646 if (TARGET_64BIT)
26648 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26649 && TYPE_SIZE (type)
26650 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26651 && wi::geu_p (TYPE_SIZE (type), 128)
26652 && align < 128)
26653 return 128;
26656 if (!opt)
26657 return align;
26659 if (TREE_CODE (type) == ARRAY_TYPE)
26661 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26662 return 64;
26663 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26664 return 128;
26666 else if (TREE_CODE (type) == COMPLEX_TYPE)
26669 if (TYPE_MODE (type) == DCmode && align < 64)
26670 return 64;
26671 if ((TYPE_MODE (type) == XCmode
26672 || TYPE_MODE (type) == TCmode) && align < 128)
26673 return 128;
26675 else if ((TREE_CODE (type) == RECORD_TYPE
26676 || TREE_CODE (type) == UNION_TYPE
26677 || TREE_CODE (type) == QUAL_UNION_TYPE)
26678 && TYPE_FIELDS (type))
26680 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26681 return 64;
26682 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26683 return 128;
26685 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26686 || TREE_CODE (type) == INTEGER_TYPE)
26688 if (TYPE_MODE (type) == DFmode && align < 64)
26689 return 64;
26690 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26691 return 128;
26694 return align;
26697 /* Compute the alignment for a local variable or a stack slot. EXP is
26698 the data type or decl itself, MODE is the widest mode available and
26699 ALIGN is the alignment that the object would ordinarily have. The
26700 value of this macro is used instead of that alignment to align the
26701 object. */
26703 unsigned int
26704 ix86_local_alignment (tree exp, enum machine_mode mode,
26705 unsigned int align)
26707 tree type, decl;
26709 if (exp && DECL_P (exp))
26711 type = TREE_TYPE (exp);
26712 decl = exp;
26714 else
26716 type = exp;
26717 decl = NULL;
26720 /* Don't do dynamic stack realignment for long long objects with
26721 -mpreferred-stack-boundary=2. */
26722 if (!TARGET_64BIT
26723 && align == 64
26724 && ix86_preferred_stack_boundary < 64
26725 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26726 && (!type || !TYPE_USER_ALIGN (type))
26727 && (!decl || !DECL_USER_ALIGN (decl)))
26728 align = 32;
26730 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26731 register in MODE. We will return the largest alignment of XF
26732 and DF. */
26733 if (!type)
26735 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26736 align = GET_MODE_ALIGNMENT (DFmode);
26737 return align;
26740 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26741 to 16byte boundary. Exact wording is:
26743 An array uses the same alignment as its elements, except that a local or
26744 global array variable of length at least 16 bytes or
26745 a C99 variable-length array variable always has alignment of at least 16 bytes.
26747 This was added to allow use of aligned SSE instructions at arrays. This
26748 rule is meant for static storage (where compiler can not do the analysis
26749 by itself). We follow it for automatic variables only when convenient.
26750 We fully control everything in the function compiled and functions from
26751 other unit can not rely on the alignment.
26753 Exclude va_list type. It is the common case of local array where
26754 we can not benefit from the alignment.
26756 TODO: Probably one should optimize for size only when var is not escaping. */
26757 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26758 && TARGET_SSE)
26760 if (AGGREGATE_TYPE_P (type)
26761 && (va_list_type_node == NULL_TREE
26762 || (TYPE_MAIN_VARIANT (type)
26763 != TYPE_MAIN_VARIANT (va_list_type_node)))
26764 && TYPE_SIZE (type)
26765 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26766 && wi::geu_p (TYPE_SIZE (type), 16)
26767 && align < 128)
26768 return 128;
26770 if (TREE_CODE (type) == ARRAY_TYPE)
26772 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26773 return 64;
26774 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26775 return 128;
26777 else if (TREE_CODE (type) == COMPLEX_TYPE)
26779 if (TYPE_MODE (type) == DCmode && align < 64)
26780 return 64;
26781 if ((TYPE_MODE (type) == XCmode
26782 || TYPE_MODE (type) == TCmode) && align < 128)
26783 return 128;
26785 else if ((TREE_CODE (type) == RECORD_TYPE
26786 || TREE_CODE (type) == UNION_TYPE
26787 || TREE_CODE (type) == QUAL_UNION_TYPE)
26788 && TYPE_FIELDS (type))
26790 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26791 return 64;
26792 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26793 return 128;
26795 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26796 || TREE_CODE (type) == INTEGER_TYPE)
26799 if (TYPE_MODE (type) == DFmode && align < 64)
26800 return 64;
26801 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26802 return 128;
26804 return align;
26807 /* Compute the minimum required alignment for dynamic stack realignment
26808 purposes for a local variable, parameter or a stack slot. EXP is
26809 the data type or decl itself, MODE is its mode and ALIGN is the
26810 alignment that the object would ordinarily have. */
26812 unsigned int
26813 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26814 unsigned int align)
26816 tree type, decl;
26818 if (exp && DECL_P (exp))
26820 type = TREE_TYPE (exp);
26821 decl = exp;
26823 else
26825 type = exp;
26826 decl = NULL;
26829 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26830 return align;
26832 /* Don't do dynamic stack realignment for long long objects with
26833 -mpreferred-stack-boundary=2. */
26834 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26835 && (!type || !TYPE_USER_ALIGN (type))
26836 && (!decl || !DECL_USER_ALIGN (decl)))
26837 return 32;
26839 return align;
26842 /* Find a location for the static chain incoming to a nested function.
26843 This is a register, unless all free registers are used by arguments. */
26845 static rtx
26846 ix86_static_chain (const_tree fndecl, bool incoming_p)
26848 unsigned regno;
26850 if (!DECL_STATIC_CHAIN (fndecl))
26851 return NULL;
26853 if (TARGET_64BIT)
26855 /* We always use R10 in 64-bit mode. */
26856 regno = R10_REG;
26858 else
26860 tree fntype;
26861 unsigned int ccvt;
26863 /* By default in 32-bit mode we use ECX to pass the static chain. */
26864 regno = CX_REG;
26866 fntype = TREE_TYPE (fndecl);
26867 ccvt = ix86_get_callcvt (fntype);
26868 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26870 /* Fastcall functions use ecx/edx for arguments, which leaves
26871 us with EAX for the static chain.
26872 Thiscall functions use ecx for arguments, which also
26873 leaves us with EAX for the static chain. */
26874 regno = AX_REG;
26876 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26878 /* Thiscall functions use ecx for arguments, which leaves
26879 us with EAX and EDX for the static chain.
26880 We are using for abi-compatibility EAX. */
26881 regno = AX_REG;
26883 else if (ix86_function_regparm (fntype, fndecl) == 3)
26885 /* For regparm 3, we have no free call-clobbered registers in
26886 which to store the static chain. In order to implement this,
26887 we have the trampoline push the static chain to the stack.
26888 However, we can't push a value below the return address when
26889 we call the nested function directly, so we have to use an
26890 alternate entry point. For this we use ESI, and have the
26891 alternate entry point push ESI, so that things appear the
26892 same once we're executing the nested function. */
26893 if (incoming_p)
26895 if (fndecl == current_function_decl)
26896 ix86_static_chain_on_stack = true;
26897 return gen_frame_mem (SImode,
26898 plus_constant (Pmode,
26899 arg_pointer_rtx, -8));
26901 regno = SI_REG;
26905 return gen_rtx_REG (Pmode, regno);
26908 /* Emit RTL insns to initialize the variable parts of a trampoline.
26909 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26910 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26911 to be passed to the target function. */
26913 static void
26914 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26916 rtx mem, fnaddr;
26917 int opcode;
26918 int offset = 0;
26920 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26922 if (TARGET_64BIT)
26924 int size;
26926 /* Load the function address to r11. Try to load address using
26927 the shorter movl instead of movabs. We may want to support
26928 movq for kernel mode, but kernel does not use trampolines at
26929 the moment. FNADDR is a 32bit address and may not be in
26930 DImode when ptr_mode == SImode. Always use movl in this
26931 case. */
26932 if (ptr_mode == SImode
26933 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26935 fnaddr = copy_addr_to_reg (fnaddr);
26937 mem = adjust_address (m_tramp, HImode, offset);
26938 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26940 mem = adjust_address (m_tramp, SImode, offset + 2);
26941 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26942 offset += 6;
26944 else
26946 mem = adjust_address (m_tramp, HImode, offset);
26947 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26949 mem = adjust_address (m_tramp, DImode, offset + 2);
26950 emit_move_insn (mem, fnaddr);
26951 offset += 10;
26954 /* Load static chain using movabs to r10. Use the shorter movl
26955 instead of movabs when ptr_mode == SImode. */
26956 if (ptr_mode == SImode)
26958 opcode = 0xba41;
26959 size = 6;
26961 else
26963 opcode = 0xba49;
26964 size = 10;
26967 mem = adjust_address (m_tramp, HImode, offset);
26968 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26970 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26971 emit_move_insn (mem, chain_value);
26972 offset += size;
26974 /* Jump to r11; the last (unused) byte is a nop, only there to
26975 pad the write out to a single 32-bit store. */
26976 mem = adjust_address (m_tramp, SImode, offset);
26977 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26978 offset += 4;
26980 else
26982 rtx disp, chain;
26984 /* Depending on the static chain location, either load a register
26985 with a constant, or push the constant to the stack. All of the
26986 instructions are the same size. */
26987 chain = ix86_static_chain (fndecl, true);
26988 if (REG_P (chain))
26990 switch (REGNO (chain))
26992 case AX_REG:
26993 opcode = 0xb8; break;
26994 case CX_REG:
26995 opcode = 0xb9; break;
26996 default:
26997 gcc_unreachable ();
27000 else
27001 opcode = 0x68;
27003 mem = adjust_address (m_tramp, QImode, offset);
27004 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27006 mem = adjust_address (m_tramp, SImode, offset + 1);
27007 emit_move_insn (mem, chain_value);
27008 offset += 5;
27010 mem = adjust_address (m_tramp, QImode, offset);
27011 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27013 mem = adjust_address (m_tramp, SImode, offset + 1);
27015 /* Compute offset from the end of the jmp to the target function.
27016 In the case in which the trampoline stores the static chain on
27017 the stack, we need to skip the first insn which pushes the
27018 (call-saved) register static chain; this push is 1 byte. */
27019 offset += 5;
27020 disp = expand_binop (SImode, sub_optab, fnaddr,
27021 plus_constant (Pmode, XEXP (m_tramp, 0),
27022 offset - (MEM_P (chain) ? 1 : 0)),
27023 NULL_RTX, 1, OPTAB_DIRECT);
27024 emit_move_insn (mem, disp);
27027 gcc_assert (offset <= TRAMPOLINE_SIZE);
27029 #ifdef HAVE_ENABLE_EXECUTE_STACK
27030 #ifdef CHECK_EXECUTE_STACK_ENABLED
27031 if (CHECK_EXECUTE_STACK_ENABLED)
27032 #endif
27033 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27034 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27035 #endif
27038 /* The following file contains several enumerations and data structures
27039 built from the definitions in i386-builtin-types.def. */
27041 #include "i386-builtin-types.inc"
27043 /* Table for the ix86 builtin non-function types. */
27044 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27046 /* Retrieve an element from the above table, building some of
27047 the types lazily. */
27049 static tree
27050 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27052 unsigned int index;
27053 tree type, itype;
27055 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27057 type = ix86_builtin_type_tab[(int) tcode];
27058 if (type != NULL)
27059 return type;
27061 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27062 if (tcode <= IX86_BT_LAST_VECT)
27064 enum machine_mode mode;
27066 index = tcode - IX86_BT_LAST_PRIM - 1;
27067 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27068 mode = ix86_builtin_type_vect_mode[index];
27070 type = build_vector_type_for_mode (itype, mode);
27072 else
27074 int quals;
27076 index = tcode - IX86_BT_LAST_VECT - 1;
27077 if (tcode <= IX86_BT_LAST_PTR)
27078 quals = TYPE_UNQUALIFIED;
27079 else
27080 quals = TYPE_QUAL_CONST;
27082 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27083 if (quals != TYPE_UNQUALIFIED)
27084 itype = build_qualified_type (itype, quals);
27086 type = build_pointer_type (itype);
27089 ix86_builtin_type_tab[(int) tcode] = type;
27090 return type;
27093 /* Table for the ix86 builtin function types. */
27094 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27096 /* Retrieve an element from the above table, building some of
27097 the types lazily. */
27099 static tree
27100 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27102 tree type;
27104 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27106 type = ix86_builtin_func_type_tab[(int) tcode];
27107 if (type != NULL)
27108 return type;
27110 if (tcode <= IX86_BT_LAST_FUNC)
27112 unsigned start = ix86_builtin_func_start[(int) tcode];
27113 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27114 tree rtype, atype, args = void_list_node;
27115 unsigned i;
27117 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27118 for (i = after - 1; i > start; --i)
27120 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27121 args = tree_cons (NULL, atype, args);
27124 type = build_function_type (rtype, args);
27126 else
27128 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27129 enum ix86_builtin_func_type icode;
27131 icode = ix86_builtin_func_alias_base[index];
27132 type = ix86_get_builtin_func_type (icode);
27135 ix86_builtin_func_type_tab[(int) tcode] = type;
27136 return type;
27140 /* Codes for all the SSE/MMX builtins. */
27141 enum ix86_builtins
27143 IX86_BUILTIN_ADDPS,
27144 IX86_BUILTIN_ADDSS,
27145 IX86_BUILTIN_DIVPS,
27146 IX86_BUILTIN_DIVSS,
27147 IX86_BUILTIN_MULPS,
27148 IX86_BUILTIN_MULSS,
27149 IX86_BUILTIN_SUBPS,
27150 IX86_BUILTIN_SUBSS,
27152 IX86_BUILTIN_CMPEQPS,
27153 IX86_BUILTIN_CMPLTPS,
27154 IX86_BUILTIN_CMPLEPS,
27155 IX86_BUILTIN_CMPGTPS,
27156 IX86_BUILTIN_CMPGEPS,
27157 IX86_BUILTIN_CMPNEQPS,
27158 IX86_BUILTIN_CMPNLTPS,
27159 IX86_BUILTIN_CMPNLEPS,
27160 IX86_BUILTIN_CMPNGTPS,
27161 IX86_BUILTIN_CMPNGEPS,
27162 IX86_BUILTIN_CMPORDPS,
27163 IX86_BUILTIN_CMPUNORDPS,
27164 IX86_BUILTIN_CMPEQSS,
27165 IX86_BUILTIN_CMPLTSS,
27166 IX86_BUILTIN_CMPLESS,
27167 IX86_BUILTIN_CMPNEQSS,
27168 IX86_BUILTIN_CMPNLTSS,
27169 IX86_BUILTIN_CMPNLESS,
27170 IX86_BUILTIN_CMPORDSS,
27171 IX86_BUILTIN_CMPUNORDSS,
27173 IX86_BUILTIN_COMIEQSS,
27174 IX86_BUILTIN_COMILTSS,
27175 IX86_BUILTIN_COMILESS,
27176 IX86_BUILTIN_COMIGTSS,
27177 IX86_BUILTIN_COMIGESS,
27178 IX86_BUILTIN_COMINEQSS,
27179 IX86_BUILTIN_UCOMIEQSS,
27180 IX86_BUILTIN_UCOMILTSS,
27181 IX86_BUILTIN_UCOMILESS,
27182 IX86_BUILTIN_UCOMIGTSS,
27183 IX86_BUILTIN_UCOMIGESS,
27184 IX86_BUILTIN_UCOMINEQSS,
27186 IX86_BUILTIN_CVTPI2PS,
27187 IX86_BUILTIN_CVTPS2PI,
27188 IX86_BUILTIN_CVTSI2SS,
27189 IX86_BUILTIN_CVTSI642SS,
27190 IX86_BUILTIN_CVTSS2SI,
27191 IX86_BUILTIN_CVTSS2SI64,
27192 IX86_BUILTIN_CVTTPS2PI,
27193 IX86_BUILTIN_CVTTSS2SI,
27194 IX86_BUILTIN_CVTTSS2SI64,
27196 IX86_BUILTIN_MAXPS,
27197 IX86_BUILTIN_MAXSS,
27198 IX86_BUILTIN_MINPS,
27199 IX86_BUILTIN_MINSS,
27201 IX86_BUILTIN_LOADUPS,
27202 IX86_BUILTIN_STOREUPS,
27203 IX86_BUILTIN_MOVSS,
27205 IX86_BUILTIN_MOVHLPS,
27206 IX86_BUILTIN_MOVLHPS,
27207 IX86_BUILTIN_LOADHPS,
27208 IX86_BUILTIN_LOADLPS,
27209 IX86_BUILTIN_STOREHPS,
27210 IX86_BUILTIN_STORELPS,
27212 IX86_BUILTIN_MASKMOVQ,
27213 IX86_BUILTIN_MOVMSKPS,
27214 IX86_BUILTIN_PMOVMSKB,
27216 IX86_BUILTIN_MOVNTPS,
27217 IX86_BUILTIN_MOVNTQ,
27219 IX86_BUILTIN_LOADDQU,
27220 IX86_BUILTIN_STOREDQU,
27222 IX86_BUILTIN_PACKSSWB,
27223 IX86_BUILTIN_PACKSSDW,
27224 IX86_BUILTIN_PACKUSWB,
27226 IX86_BUILTIN_PADDB,
27227 IX86_BUILTIN_PADDW,
27228 IX86_BUILTIN_PADDD,
27229 IX86_BUILTIN_PADDQ,
27230 IX86_BUILTIN_PADDSB,
27231 IX86_BUILTIN_PADDSW,
27232 IX86_BUILTIN_PADDUSB,
27233 IX86_BUILTIN_PADDUSW,
27234 IX86_BUILTIN_PSUBB,
27235 IX86_BUILTIN_PSUBW,
27236 IX86_BUILTIN_PSUBD,
27237 IX86_BUILTIN_PSUBQ,
27238 IX86_BUILTIN_PSUBSB,
27239 IX86_BUILTIN_PSUBSW,
27240 IX86_BUILTIN_PSUBUSB,
27241 IX86_BUILTIN_PSUBUSW,
27243 IX86_BUILTIN_PAND,
27244 IX86_BUILTIN_PANDN,
27245 IX86_BUILTIN_POR,
27246 IX86_BUILTIN_PXOR,
27248 IX86_BUILTIN_PAVGB,
27249 IX86_BUILTIN_PAVGW,
27251 IX86_BUILTIN_PCMPEQB,
27252 IX86_BUILTIN_PCMPEQW,
27253 IX86_BUILTIN_PCMPEQD,
27254 IX86_BUILTIN_PCMPGTB,
27255 IX86_BUILTIN_PCMPGTW,
27256 IX86_BUILTIN_PCMPGTD,
27258 IX86_BUILTIN_PMADDWD,
27260 IX86_BUILTIN_PMAXSW,
27261 IX86_BUILTIN_PMAXUB,
27262 IX86_BUILTIN_PMINSW,
27263 IX86_BUILTIN_PMINUB,
27265 IX86_BUILTIN_PMULHUW,
27266 IX86_BUILTIN_PMULHW,
27267 IX86_BUILTIN_PMULLW,
27269 IX86_BUILTIN_PSADBW,
27270 IX86_BUILTIN_PSHUFW,
27272 IX86_BUILTIN_PSLLW,
27273 IX86_BUILTIN_PSLLD,
27274 IX86_BUILTIN_PSLLQ,
27275 IX86_BUILTIN_PSRAW,
27276 IX86_BUILTIN_PSRAD,
27277 IX86_BUILTIN_PSRLW,
27278 IX86_BUILTIN_PSRLD,
27279 IX86_BUILTIN_PSRLQ,
27280 IX86_BUILTIN_PSLLWI,
27281 IX86_BUILTIN_PSLLDI,
27282 IX86_BUILTIN_PSLLQI,
27283 IX86_BUILTIN_PSRAWI,
27284 IX86_BUILTIN_PSRADI,
27285 IX86_BUILTIN_PSRLWI,
27286 IX86_BUILTIN_PSRLDI,
27287 IX86_BUILTIN_PSRLQI,
27289 IX86_BUILTIN_PUNPCKHBW,
27290 IX86_BUILTIN_PUNPCKHWD,
27291 IX86_BUILTIN_PUNPCKHDQ,
27292 IX86_BUILTIN_PUNPCKLBW,
27293 IX86_BUILTIN_PUNPCKLWD,
27294 IX86_BUILTIN_PUNPCKLDQ,
27296 IX86_BUILTIN_SHUFPS,
27298 IX86_BUILTIN_RCPPS,
27299 IX86_BUILTIN_RCPSS,
27300 IX86_BUILTIN_RSQRTPS,
27301 IX86_BUILTIN_RSQRTPS_NR,
27302 IX86_BUILTIN_RSQRTSS,
27303 IX86_BUILTIN_RSQRTF,
27304 IX86_BUILTIN_SQRTPS,
27305 IX86_BUILTIN_SQRTPS_NR,
27306 IX86_BUILTIN_SQRTSS,
27308 IX86_BUILTIN_UNPCKHPS,
27309 IX86_BUILTIN_UNPCKLPS,
27311 IX86_BUILTIN_ANDPS,
27312 IX86_BUILTIN_ANDNPS,
27313 IX86_BUILTIN_ORPS,
27314 IX86_BUILTIN_XORPS,
27316 IX86_BUILTIN_EMMS,
27317 IX86_BUILTIN_LDMXCSR,
27318 IX86_BUILTIN_STMXCSR,
27319 IX86_BUILTIN_SFENCE,
27321 IX86_BUILTIN_FXSAVE,
27322 IX86_BUILTIN_FXRSTOR,
27323 IX86_BUILTIN_FXSAVE64,
27324 IX86_BUILTIN_FXRSTOR64,
27326 IX86_BUILTIN_XSAVE,
27327 IX86_BUILTIN_XRSTOR,
27328 IX86_BUILTIN_XSAVE64,
27329 IX86_BUILTIN_XRSTOR64,
27331 IX86_BUILTIN_XSAVEOPT,
27332 IX86_BUILTIN_XSAVEOPT64,
27334 IX86_BUILTIN_XSAVEC,
27335 IX86_BUILTIN_XSAVEC64,
27337 IX86_BUILTIN_XSAVES,
27338 IX86_BUILTIN_XRSTORS,
27339 IX86_BUILTIN_XSAVES64,
27340 IX86_BUILTIN_XRSTORS64,
27342 /* 3DNow! Original */
27343 IX86_BUILTIN_FEMMS,
27344 IX86_BUILTIN_PAVGUSB,
27345 IX86_BUILTIN_PF2ID,
27346 IX86_BUILTIN_PFACC,
27347 IX86_BUILTIN_PFADD,
27348 IX86_BUILTIN_PFCMPEQ,
27349 IX86_BUILTIN_PFCMPGE,
27350 IX86_BUILTIN_PFCMPGT,
27351 IX86_BUILTIN_PFMAX,
27352 IX86_BUILTIN_PFMIN,
27353 IX86_BUILTIN_PFMUL,
27354 IX86_BUILTIN_PFRCP,
27355 IX86_BUILTIN_PFRCPIT1,
27356 IX86_BUILTIN_PFRCPIT2,
27357 IX86_BUILTIN_PFRSQIT1,
27358 IX86_BUILTIN_PFRSQRT,
27359 IX86_BUILTIN_PFSUB,
27360 IX86_BUILTIN_PFSUBR,
27361 IX86_BUILTIN_PI2FD,
27362 IX86_BUILTIN_PMULHRW,
27364 /* 3DNow! Athlon Extensions */
27365 IX86_BUILTIN_PF2IW,
27366 IX86_BUILTIN_PFNACC,
27367 IX86_BUILTIN_PFPNACC,
27368 IX86_BUILTIN_PI2FW,
27369 IX86_BUILTIN_PSWAPDSI,
27370 IX86_BUILTIN_PSWAPDSF,
27372 /* SSE2 */
27373 IX86_BUILTIN_ADDPD,
27374 IX86_BUILTIN_ADDSD,
27375 IX86_BUILTIN_DIVPD,
27376 IX86_BUILTIN_DIVSD,
27377 IX86_BUILTIN_MULPD,
27378 IX86_BUILTIN_MULSD,
27379 IX86_BUILTIN_SUBPD,
27380 IX86_BUILTIN_SUBSD,
27382 IX86_BUILTIN_CMPEQPD,
27383 IX86_BUILTIN_CMPLTPD,
27384 IX86_BUILTIN_CMPLEPD,
27385 IX86_BUILTIN_CMPGTPD,
27386 IX86_BUILTIN_CMPGEPD,
27387 IX86_BUILTIN_CMPNEQPD,
27388 IX86_BUILTIN_CMPNLTPD,
27389 IX86_BUILTIN_CMPNLEPD,
27390 IX86_BUILTIN_CMPNGTPD,
27391 IX86_BUILTIN_CMPNGEPD,
27392 IX86_BUILTIN_CMPORDPD,
27393 IX86_BUILTIN_CMPUNORDPD,
27394 IX86_BUILTIN_CMPEQSD,
27395 IX86_BUILTIN_CMPLTSD,
27396 IX86_BUILTIN_CMPLESD,
27397 IX86_BUILTIN_CMPNEQSD,
27398 IX86_BUILTIN_CMPNLTSD,
27399 IX86_BUILTIN_CMPNLESD,
27400 IX86_BUILTIN_CMPORDSD,
27401 IX86_BUILTIN_CMPUNORDSD,
27403 IX86_BUILTIN_COMIEQSD,
27404 IX86_BUILTIN_COMILTSD,
27405 IX86_BUILTIN_COMILESD,
27406 IX86_BUILTIN_COMIGTSD,
27407 IX86_BUILTIN_COMIGESD,
27408 IX86_BUILTIN_COMINEQSD,
27409 IX86_BUILTIN_UCOMIEQSD,
27410 IX86_BUILTIN_UCOMILTSD,
27411 IX86_BUILTIN_UCOMILESD,
27412 IX86_BUILTIN_UCOMIGTSD,
27413 IX86_BUILTIN_UCOMIGESD,
27414 IX86_BUILTIN_UCOMINEQSD,
27416 IX86_BUILTIN_MAXPD,
27417 IX86_BUILTIN_MAXSD,
27418 IX86_BUILTIN_MINPD,
27419 IX86_BUILTIN_MINSD,
27421 IX86_BUILTIN_ANDPD,
27422 IX86_BUILTIN_ANDNPD,
27423 IX86_BUILTIN_ORPD,
27424 IX86_BUILTIN_XORPD,
27426 IX86_BUILTIN_SQRTPD,
27427 IX86_BUILTIN_SQRTSD,
27429 IX86_BUILTIN_UNPCKHPD,
27430 IX86_BUILTIN_UNPCKLPD,
27432 IX86_BUILTIN_SHUFPD,
27434 IX86_BUILTIN_LOADUPD,
27435 IX86_BUILTIN_STOREUPD,
27436 IX86_BUILTIN_MOVSD,
27438 IX86_BUILTIN_LOADHPD,
27439 IX86_BUILTIN_LOADLPD,
27441 IX86_BUILTIN_CVTDQ2PD,
27442 IX86_BUILTIN_CVTDQ2PS,
27444 IX86_BUILTIN_CVTPD2DQ,
27445 IX86_BUILTIN_CVTPD2PI,
27446 IX86_BUILTIN_CVTPD2PS,
27447 IX86_BUILTIN_CVTTPD2DQ,
27448 IX86_BUILTIN_CVTTPD2PI,
27450 IX86_BUILTIN_CVTPI2PD,
27451 IX86_BUILTIN_CVTSI2SD,
27452 IX86_BUILTIN_CVTSI642SD,
27454 IX86_BUILTIN_CVTSD2SI,
27455 IX86_BUILTIN_CVTSD2SI64,
27456 IX86_BUILTIN_CVTSD2SS,
27457 IX86_BUILTIN_CVTSS2SD,
27458 IX86_BUILTIN_CVTTSD2SI,
27459 IX86_BUILTIN_CVTTSD2SI64,
27461 IX86_BUILTIN_CVTPS2DQ,
27462 IX86_BUILTIN_CVTPS2PD,
27463 IX86_BUILTIN_CVTTPS2DQ,
27465 IX86_BUILTIN_MOVNTI,
27466 IX86_BUILTIN_MOVNTI64,
27467 IX86_BUILTIN_MOVNTPD,
27468 IX86_BUILTIN_MOVNTDQ,
27470 IX86_BUILTIN_MOVQ128,
27472 /* SSE2 MMX */
27473 IX86_BUILTIN_MASKMOVDQU,
27474 IX86_BUILTIN_MOVMSKPD,
27475 IX86_BUILTIN_PMOVMSKB128,
27477 IX86_BUILTIN_PACKSSWB128,
27478 IX86_BUILTIN_PACKSSDW128,
27479 IX86_BUILTIN_PACKUSWB128,
27481 IX86_BUILTIN_PADDB128,
27482 IX86_BUILTIN_PADDW128,
27483 IX86_BUILTIN_PADDD128,
27484 IX86_BUILTIN_PADDQ128,
27485 IX86_BUILTIN_PADDSB128,
27486 IX86_BUILTIN_PADDSW128,
27487 IX86_BUILTIN_PADDUSB128,
27488 IX86_BUILTIN_PADDUSW128,
27489 IX86_BUILTIN_PSUBB128,
27490 IX86_BUILTIN_PSUBW128,
27491 IX86_BUILTIN_PSUBD128,
27492 IX86_BUILTIN_PSUBQ128,
27493 IX86_BUILTIN_PSUBSB128,
27494 IX86_BUILTIN_PSUBSW128,
27495 IX86_BUILTIN_PSUBUSB128,
27496 IX86_BUILTIN_PSUBUSW128,
27498 IX86_BUILTIN_PAND128,
27499 IX86_BUILTIN_PANDN128,
27500 IX86_BUILTIN_POR128,
27501 IX86_BUILTIN_PXOR128,
27503 IX86_BUILTIN_PAVGB128,
27504 IX86_BUILTIN_PAVGW128,
27506 IX86_BUILTIN_PCMPEQB128,
27507 IX86_BUILTIN_PCMPEQW128,
27508 IX86_BUILTIN_PCMPEQD128,
27509 IX86_BUILTIN_PCMPGTB128,
27510 IX86_BUILTIN_PCMPGTW128,
27511 IX86_BUILTIN_PCMPGTD128,
27513 IX86_BUILTIN_PMADDWD128,
27515 IX86_BUILTIN_PMAXSW128,
27516 IX86_BUILTIN_PMAXUB128,
27517 IX86_BUILTIN_PMINSW128,
27518 IX86_BUILTIN_PMINUB128,
27520 IX86_BUILTIN_PMULUDQ,
27521 IX86_BUILTIN_PMULUDQ128,
27522 IX86_BUILTIN_PMULHUW128,
27523 IX86_BUILTIN_PMULHW128,
27524 IX86_BUILTIN_PMULLW128,
27526 IX86_BUILTIN_PSADBW128,
27527 IX86_BUILTIN_PSHUFHW,
27528 IX86_BUILTIN_PSHUFLW,
27529 IX86_BUILTIN_PSHUFD,
27531 IX86_BUILTIN_PSLLDQI128,
27532 IX86_BUILTIN_PSLLWI128,
27533 IX86_BUILTIN_PSLLDI128,
27534 IX86_BUILTIN_PSLLQI128,
27535 IX86_BUILTIN_PSRAWI128,
27536 IX86_BUILTIN_PSRADI128,
27537 IX86_BUILTIN_PSRLDQI128,
27538 IX86_BUILTIN_PSRLWI128,
27539 IX86_BUILTIN_PSRLDI128,
27540 IX86_BUILTIN_PSRLQI128,
27542 IX86_BUILTIN_PSLLDQ128,
27543 IX86_BUILTIN_PSLLW128,
27544 IX86_BUILTIN_PSLLD128,
27545 IX86_BUILTIN_PSLLQ128,
27546 IX86_BUILTIN_PSRAW128,
27547 IX86_BUILTIN_PSRAD128,
27548 IX86_BUILTIN_PSRLW128,
27549 IX86_BUILTIN_PSRLD128,
27550 IX86_BUILTIN_PSRLQ128,
27552 IX86_BUILTIN_PUNPCKHBW128,
27553 IX86_BUILTIN_PUNPCKHWD128,
27554 IX86_BUILTIN_PUNPCKHDQ128,
27555 IX86_BUILTIN_PUNPCKHQDQ128,
27556 IX86_BUILTIN_PUNPCKLBW128,
27557 IX86_BUILTIN_PUNPCKLWD128,
27558 IX86_BUILTIN_PUNPCKLDQ128,
27559 IX86_BUILTIN_PUNPCKLQDQ128,
27561 IX86_BUILTIN_CLFLUSH,
27562 IX86_BUILTIN_MFENCE,
27563 IX86_BUILTIN_LFENCE,
27564 IX86_BUILTIN_PAUSE,
27566 IX86_BUILTIN_FNSTENV,
27567 IX86_BUILTIN_FLDENV,
27568 IX86_BUILTIN_FNSTSW,
27569 IX86_BUILTIN_FNCLEX,
27571 IX86_BUILTIN_BSRSI,
27572 IX86_BUILTIN_BSRDI,
27573 IX86_BUILTIN_RDPMC,
27574 IX86_BUILTIN_RDTSC,
27575 IX86_BUILTIN_RDTSCP,
27576 IX86_BUILTIN_ROLQI,
27577 IX86_BUILTIN_ROLHI,
27578 IX86_BUILTIN_RORQI,
27579 IX86_BUILTIN_RORHI,
27581 /* SSE3. */
27582 IX86_BUILTIN_ADDSUBPS,
27583 IX86_BUILTIN_HADDPS,
27584 IX86_BUILTIN_HSUBPS,
27585 IX86_BUILTIN_MOVSHDUP,
27586 IX86_BUILTIN_MOVSLDUP,
27587 IX86_BUILTIN_ADDSUBPD,
27588 IX86_BUILTIN_HADDPD,
27589 IX86_BUILTIN_HSUBPD,
27590 IX86_BUILTIN_LDDQU,
27592 IX86_BUILTIN_MONITOR,
27593 IX86_BUILTIN_MWAIT,
27595 /* SSSE3. */
27596 IX86_BUILTIN_PHADDW,
27597 IX86_BUILTIN_PHADDD,
27598 IX86_BUILTIN_PHADDSW,
27599 IX86_BUILTIN_PHSUBW,
27600 IX86_BUILTIN_PHSUBD,
27601 IX86_BUILTIN_PHSUBSW,
27602 IX86_BUILTIN_PMADDUBSW,
27603 IX86_BUILTIN_PMULHRSW,
27604 IX86_BUILTIN_PSHUFB,
27605 IX86_BUILTIN_PSIGNB,
27606 IX86_BUILTIN_PSIGNW,
27607 IX86_BUILTIN_PSIGND,
27608 IX86_BUILTIN_PALIGNR,
27609 IX86_BUILTIN_PABSB,
27610 IX86_BUILTIN_PABSW,
27611 IX86_BUILTIN_PABSD,
27613 IX86_BUILTIN_PHADDW128,
27614 IX86_BUILTIN_PHADDD128,
27615 IX86_BUILTIN_PHADDSW128,
27616 IX86_BUILTIN_PHSUBW128,
27617 IX86_BUILTIN_PHSUBD128,
27618 IX86_BUILTIN_PHSUBSW128,
27619 IX86_BUILTIN_PMADDUBSW128,
27620 IX86_BUILTIN_PMULHRSW128,
27621 IX86_BUILTIN_PSHUFB128,
27622 IX86_BUILTIN_PSIGNB128,
27623 IX86_BUILTIN_PSIGNW128,
27624 IX86_BUILTIN_PSIGND128,
27625 IX86_BUILTIN_PALIGNR128,
27626 IX86_BUILTIN_PABSB128,
27627 IX86_BUILTIN_PABSW128,
27628 IX86_BUILTIN_PABSD128,
27630 /* AMDFAM10 - SSE4A New Instructions. */
27631 IX86_BUILTIN_MOVNTSD,
27632 IX86_BUILTIN_MOVNTSS,
27633 IX86_BUILTIN_EXTRQI,
27634 IX86_BUILTIN_EXTRQ,
27635 IX86_BUILTIN_INSERTQI,
27636 IX86_BUILTIN_INSERTQ,
27638 /* SSE4.1. */
27639 IX86_BUILTIN_BLENDPD,
27640 IX86_BUILTIN_BLENDPS,
27641 IX86_BUILTIN_BLENDVPD,
27642 IX86_BUILTIN_BLENDVPS,
27643 IX86_BUILTIN_PBLENDVB128,
27644 IX86_BUILTIN_PBLENDW128,
27646 IX86_BUILTIN_DPPD,
27647 IX86_BUILTIN_DPPS,
27649 IX86_BUILTIN_INSERTPS128,
27651 IX86_BUILTIN_MOVNTDQA,
27652 IX86_BUILTIN_MPSADBW128,
27653 IX86_BUILTIN_PACKUSDW128,
27654 IX86_BUILTIN_PCMPEQQ,
27655 IX86_BUILTIN_PHMINPOSUW128,
27657 IX86_BUILTIN_PMAXSB128,
27658 IX86_BUILTIN_PMAXSD128,
27659 IX86_BUILTIN_PMAXUD128,
27660 IX86_BUILTIN_PMAXUW128,
27662 IX86_BUILTIN_PMINSB128,
27663 IX86_BUILTIN_PMINSD128,
27664 IX86_BUILTIN_PMINUD128,
27665 IX86_BUILTIN_PMINUW128,
27667 IX86_BUILTIN_PMOVSXBW128,
27668 IX86_BUILTIN_PMOVSXBD128,
27669 IX86_BUILTIN_PMOVSXBQ128,
27670 IX86_BUILTIN_PMOVSXWD128,
27671 IX86_BUILTIN_PMOVSXWQ128,
27672 IX86_BUILTIN_PMOVSXDQ128,
27674 IX86_BUILTIN_PMOVZXBW128,
27675 IX86_BUILTIN_PMOVZXBD128,
27676 IX86_BUILTIN_PMOVZXBQ128,
27677 IX86_BUILTIN_PMOVZXWD128,
27678 IX86_BUILTIN_PMOVZXWQ128,
27679 IX86_BUILTIN_PMOVZXDQ128,
27681 IX86_BUILTIN_PMULDQ128,
27682 IX86_BUILTIN_PMULLD128,
27684 IX86_BUILTIN_ROUNDSD,
27685 IX86_BUILTIN_ROUNDSS,
27687 IX86_BUILTIN_ROUNDPD,
27688 IX86_BUILTIN_ROUNDPS,
27690 IX86_BUILTIN_FLOORPD,
27691 IX86_BUILTIN_CEILPD,
27692 IX86_BUILTIN_TRUNCPD,
27693 IX86_BUILTIN_RINTPD,
27694 IX86_BUILTIN_ROUNDPD_AZ,
27696 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27697 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27698 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27700 IX86_BUILTIN_FLOORPS,
27701 IX86_BUILTIN_CEILPS,
27702 IX86_BUILTIN_TRUNCPS,
27703 IX86_BUILTIN_RINTPS,
27704 IX86_BUILTIN_ROUNDPS_AZ,
27706 IX86_BUILTIN_FLOORPS_SFIX,
27707 IX86_BUILTIN_CEILPS_SFIX,
27708 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27710 IX86_BUILTIN_PTESTZ,
27711 IX86_BUILTIN_PTESTC,
27712 IX86_BUILTIN_PTESTNZC,
27714 IX86_BUILTIN_VEC_INIT_V2SI,
27715 IX86_BUILTIN_VEC_INIT_V4HI,
27716 IX86_BUILTIN_VEC_INIT_V8QI,
27717 IX86_BUILTIN_VEC_EXT_V2DF,
27718 IX86_BUILTIN_VEC_EXT_V2DI,
27719 IX86_BUILTIN_VEC_EXT_V4SF,
27720 IX86_BUILTIN_VEC_EXT_V4SI,
27721 IX86_BUILTIN_VEC_EXT_V8HI,
27722 IX86_BUILTIN_VEC_EXT_V2SI,
27723 IX86_BUILTIN_VEC_EXT_V4HI,
27724 IX86_BUILTIN_VEC_EXT_V16QI,
27725 IX86_BUILTIN_VEC_SET_V2DI,
27726 IX86_BUILTIN_VEC_SET_V4SF,
27727 IX86_BUILTIN_VEC_SET_V4SI,
27728 IX86_BUILTIN_VEC_SET_V8HI,
27729 IX86_BUILTIN_VEC_SET_V4HI,
27730 IX86_BUILTIN_VEC_SET_V16QI,
27732 IX86_BUILTIN_VEC_PACK_SFIX,
27733 IX86_BUILTIN_VEC_PACK_SFIX256,
27735 /* SSE4.2. */
27736 IX86_BUILTIN_CRC32QI,
27737 IX86_BUILTIN_CRC32HI,
27738 IX86_BUILTIN_CRC32SI,
27739 IX86_BUILTIN_CRC32DI,
27741 IX86_BUILTIN_PCMPESTRI128,
27742 IX86_BUILTIN_PCMPESTRM128,
27743 IX86_BUILTIN_PCMPESTRA128,
27744 IX86_BUILTIN_PCMPESTRC128,
27745 IX86_BUILTIN_PCMPESTRO128,
27746 IX86_BUILTIN_PCMPESTRS128,
27747 IX86_BUILTIN_PCMPESTRZ128,
27748 IX86_BUILTIN_PCMPISTRI128,
27749 IX86_BUILTIN_PCMPISTRM128,
27750 IX86_BUILTIN_PCMPISTRA128,
27751 IX86_BUILTIN_PCMPISTRC128,
27752 IX86_BUILTIN_PCMPISTRO128,
27753 IX86_BUILTIN_PCMPISTRS128,
27754 IX86_BUILTIN_PCMPISTRZ128,
27756 IX86_BUILTIN_PCMPGTQ,
27758 /* AES instructions */
27759 IX86_BUILTIN_AESENC128,
27760 IX86_BUILTIN_AESENCLAST128,
27761 IX86_BUILTIN_AESDEC128,
27762 IX86_BUILTIN_AESDECLAST128,
27763 IX86_BUILTIN_AESIMC128,
27764 IX86_BUILTIN_AESKEYGENASSIST128,
27766 /* PCLMUL instruction */
27767 IX86_BUILTIN_PCLMULQDQ128,
27769 /* AVX */
27770 IX86_BUILTIN_ADDPD256,
27771 IX86_BUILTIN_ADDPS256,
27772 IX86_BUILTIN_ADDSUBPD256,
27773 IX86_BUILTIN_ADDSUBPS256,
27774 IX86_BUILTIN_ANDPD256,
27775 IX86_BUILTIN_ANDPS256,
27776 IX86_BUILTIN_ANDNPD256,
27777 IX86_BUILTIN_ANDNPS256,
27778 IX86_BUILTIN_BLENDPD256,
27779 IX86_BUILTIN_BLENDPS256,
27780 IX86_BUILTIN_BLENDVPD256,
27781 IX86_BUILTIN_BLENDVPS256,
27782 IX86_BUILTIN_DIVPD256,
27783 IX86_BUILTIN_DIVPS256,
27784 IX86_BUILTIN_DPPS256,
27785 IX86_BUILTIN_HADDPD256,
27786 IX86_BUILTIN_HADDPS256,
27787 IX86_BUILTIN_HSUBPD256,
27788 IX86_BUILTIN_HSUBPS256,
27789 IX86_BUILTIN_MAXPD256,
27790 IX86_BUILTIN_MAXPS256,
27791 IX86_BUILTIN_MINPD256,
27792 IX86_BUILTIN_MINPS256,
27793 IX86_BUILTIN_MULPD256,
27794 IX86_BUILTIN_MULPS256,
27795 IX86_BUILTIN_ORPD256,
27796 IX86_BUILTIN_ORPS256,
27797 IX86_BUILTIN_SHUFPD256,
27798 IX86_BUILTIN_SHUFPS256,
27799 IX86_BUILTIN_SUBPD256,
27800 IX86_BUILTIN_SUBPS256,
27801 IX86_BUILTIN_XORPD256,
27802 IX86_BUILTIN_XORPS256,
27803 IX86_BUILTIN_CMPSD,
27804 IX86_BUILTIN_CMPSS,
27805 IX86_BUILTIN_CMPPD,
27806 IX86_BUILTIN_CMPPS,
27807 IX86_BUILTIN_CMPPD256,
27808 IX86_BUILTIN_CMPPS256,
27809 IX86_BUILTIN_CVTDQ2PD256,
27810 IX86_BUILTIN_CVTDQ2PS256,
27811 IX86_BUILTIN_CVTPD2PS256,
27812 IX86_BUILTIN_CVTPS2DQ256,
27813 IX86_BUILTIN_CVTPS2PD256,
27814 IX86_BUILTIN_CVTTPD2DQ256,
27815 IX86_BUILTIN_CVTPD2DQ256,
27816 IX86_BUILTIN_CVTTPS2DQ256,
27817 IX86_BUILTIN_EXTRACTF128PD256,
27818 IX86_BUILTIN_EXTRACTF128PS256,
27819 IX86_BUILTIN_EXTRACTF128SI256,
27820 IX86_BUILTIN_VZEROALL,
27821 IX86_BUILTIN_VZEROUPPER,
27822 IX86_BUILTIN_VPERMILVARPD,
27823 IX86_BUILTIN_VPERMILVARPS,
27824 IX86_BUILTIN_VPERMILVARPD256,
27825 IX86_BUILTIN_VPERMILVARPS256,
27826 IX86_BUILTIN_VPERMILPD,
27827 IX86_BUILTIN_VPERMILPS,
27828 IX86_BUILTIN_VPERMILPD256,
27829 IX86_BUILTIN_VPERMILPS256,
27830 IX86_BUILTIN_VPERMIL2PD,
27831 IX86_BUILTIN_VPERMIL2PS,
27832 IX86_BUILTIN_VPERMIL2PD256,
27833 IX86_BUILTIN_VPERMIL2PS256,
27834 IX86_BUILTIN_VPERM2F128PD256,
27835 IX86_BUILTIN_VPERM2F128PS256,
27836 IX86_BUILTIN_VPERM2F128SI256,
27837 IX86_BUILTIN_VBROADCASTSS,
27838 IX86_BUILTIN_VBROADCASTSD256,
27839 IX86_BUILTIN_VBROADCASTSS256,
27840 IX86_BUILTIN_VBROADCASTPD256,
27841 IX86_BUILTIN_VBROADCASTPS256,
27842 IX86_BUILTIN_VINSERTF128PD256,
27843 IX86_BUILTIN_VINSERTF128PS256,
27844 IX86_BUILTIN_VINSERTF128SI256,
27845 IX86_BUILTIN_LOADUPD256,
27846 IX86_BUILTIN_LOADUPS256,
27847 IX86_BUILTIN_STOREUPD256,
27848 IX86_BUILTIN_STOREUPS256,
27849 IX86_BUILTIN_LDDQU256,
27850 IX86_BUILTIN_MOVNTDQ256,
27851 IX86_BUILTIN_MOVNTPD256,
27852 IX86_BUILTIN_MOVNTPS256,
27853 IX86_BUILTIN_LOADDQU256,
27854 IX86_BUILTIN_STOREDQU256,
27855 IX86_BUILTIN_MASKLOADPD,
27856 IX86_BUILTIN_MASKLOADPS,
27857 IX86_BUILTIN_MASKSTOREPD,
27858 IX86_BUILTIN_MASKSTOREPS,
27859 IX86_BUILTIN_MASKLOADPD256,
27860 IX86_BUILTIN_MASKLOADPS256,
27861 IX86_BUILTIN_MASKSTOREPD256,
27862 IX86_BUILTIN_MASKSTOREPS256,
27863 IX86_BUILTIN_MOVSHDUP256,
27864 IX86_BUILTIN_MOVSLDUP256,
27865 IX86_BUILTIN_MOVDDUP256,
27867 IX86_BUILTIN_SQRTPD256,
27868 IX86_BUILTIN_SQRTPS256,
27869 IX86_BUILTIN_SQRTPS_NR256,
27870 IX86_BUILTIN_RSQRTPS256,
27871 IX86_BUILTIN_RSQRTPS_NR256,
27873 IX86_BUILTIN_RCPPS256,
27875 IX86_BUILTIN_ROUNDPD256,
27876 IX86_BUILTIN_ROUNDPS256,
27878 IX86_BUILTIN_FLOORPD256,
27879 IX86_BUILTIN_CEILPD256,
27880 IX86_BUILTIN_TRUNCPD256,
27881 IX86_BUILTIN_RINTPD256,
27882 IX86_BUILTIN_ROUNDPD_AZ256,
27884 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27885 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27886 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27888 IX86_BUILTIN_FLOORPS256,
27889 IX86_BUILTIN_CEILPS256,
27890 IX86_BUILTIN_TRUNCPS256,
27891 IX86_BUILTIN_RINTPS256,
27892 IX86_BUILTIN_ROUNDPS_AZ256,
27894 IX86_BUILTIN_FLOORPS_SFIX256,
27895 IX86_BUILTIN_CEILPS_SFIX256,
27896 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27898 IX86_BUILTIN_UNPCKHPD256,
27899 IX86_BUILTIN_UNPCKLPD256,
27900 IX86_BUILTIN_UNPCKHPS256,
27901 IX86_BUILTIN_UNPCKLPS256,
27903 IX86_BUILTIN_SI256_SI,
27904 IX86_BUILTIN_PS256_PS,
27905 IX86_BUILTIN_PD256_PD,
27906 IX86_BUILTIN_SI_SI256,
27907 IX86_BUILTIN_PS_PS256,
27908 IX86_BUILTIN_PD_PD256,
27910 IX86_BUILTIN_VTESTZPD,
27911 IX86_BUILTIN_VTESTCPD,
27912 IX86_BUILTIN_VTESTNZCPD,
27913 IX86_BUILTIN_VTESTZPS,
27914 IX86_BUILTIN_VTESTCPS,
27915 IX86_BUILTIN_VTESTNZCPS,
27916 IX86_BUILTIN_VTESTZPD256,
27917 IX86_BUILTIN_VTESTCPD256,
27918 IX86_BUILTIN_VTESTNZCPD256,
27919 IX86_BUILTIN_VTESTZPS256,
27920 IX86_BUILTIN_VTESTCPS256,
27921 IX86_BUILTIN_VTESTNZCPS256,
27922 IX86_BUILTIN_PTESTZ256,
27923 IX86_BUILTIN_PTESTC256,
27924 IX86_BUILTIN_PTESTNZC256,
27926 IX86_BUILTIN_MOVMSKPD256,
27927 IX86_BUILTIN_MOVMSKPS256,
27929 /* AVX2 */
27930 IX86_BUILTIN_MPSADBW256,
27931 IX86_BUILTIN_PABSB256,
27932 IX86_BUILTIN_PABSW256,
27933 IX86_BUILTIN_PABSD256,
27934 IX86_BUILTIN_PACKSSDW256,
27935 IX86_BUILTIN_PACKSSWB256,
27936 IX86_BUILTIN_PACKUSDW256,
27937 IX86_BUILTIN_PACKUSWB256,
27938 IX86_BUILTIN_PADDB256,
27939 IX86_BUILTIN_PADDW256,
27940 IX86_BUILTIN_PADDD256,
27941 IX86_BUILTIN_PADDQ256,
27942 IX86_BUILTIN_PADDSB256,
27943 IX86_BUILTIN_PADDSW256,
27944 IX86_BUILTIN_PADDUSB256,
27945 IX86_BUILTIN_PADDUSW256,
27946 IX86_BUILTIN_PALIGNR256,
27947 IX86_BUILTIN_AND256I,
27948 IX86_BUILTIN_ANDNOT256I,
27949 IX86_BUILTIN_PAVGB256,
27950 IX86_BUILTIN_PAVGW256,
27951 IX86_BUILTIN_PBLENDVB256,
27952 IX86_BUILTIN_PBLENDVW256,
27953 IX86_BUILTIN_PCMPEQB256,
27954 IX86_BUILTIN_PCMPEQW256,
27955 IX86_BUILTIN_PCMPEQD256,
27956 IX86_BUILTIN_PCMPEQQ256,
27957 IX86_BUILTIN_PCMPGTB256,
27958 IX86_BUILTIN_PCMPGTW256,
27959 IX86_BUILTIN_PCMPGTD256,
27960 IX86_BUILTIN_PCMPGTQ256,
27961 IX86_BUILTIN_PHADDW256,
27962 IX86_BUILTIN_PHADDD256,
27963 IX86_BUILTIN_PHADDSW256,
27964 IX86_BUILTIN_PHSUBW256,
27965 IX86_BUILTIN_PHSUBD256,
27966 IX86_BUILTIN_PHSUBSW256,
27967 IX86_BUILTIN_PMADDUBSW256,
27968 IX86_BUILTIN_PMADDWD256,
27969 IX86_BUILTIN_PMAXSB256,
27970 IX86_BUILTIN_PMAXSW256,
27971 IX86_BUILTIN_PMAXSD256,
27972 IX86_BUILTIN_PMAXUB256,
27973 IX86_BUILTIN_PMAXUW256,
27974 IX86_BUILTIN_PMAXUD256,
27975 IX86_BUILTIN_PMINSB256,
27976 IX86_BUILTIN_PMINSW256,
27977 IX86_BUILTIN_PMINSD256,
27978 IX86_BUILTIN_PMINUB256,
27979 IX86_BUILTIN_PMINUW256,
27980 IX86_BUILTIN_PMINUD256,
27981 IX86_BUILTIN_PMOVMSKB256,
27982 IX86_BUILTIN_PMOVSXBW256,
27983 IX86_BUILTIN_PMOVSXBD256,
27984 IX86_BUILTIN_PMOVSXBQ256,
27985 IX86_BUILTIN_PMOVSXWD256,
27986 IX86_BUILTIN_PMOVSXWQ256,
27987 IX86_BUILTIN_PMOVSXDQ256,
27988 IX86_BUILTIN_PMOVZXBW256,
27989 IX86_BUILTIN_PMOVZXBD256,
27990 IX86_BUILTIN_PMOVZXBQ256,
27991 IX86_BUILTIN_PMOVZXWD256,
27992 IX86_BUILTIN_PMOVZXWQ256,
27993 IX86_BUILTIN_PMOVZXDQ256,
27994 IX86_BUILTIN_PMULDQ256,
27995 IX86_BUILTIN_PMULHRSW256,
27996 IX86_BUILTIN_PMULHUW256,
27997 IX86_BUILTIN_PMULHW256,
27998 IX86_BUILTIN_PMULLW256,
27999 IX86_BUILTIN_PMULLD256,
28000 IX86_BUILTIN_PMULUDQ256,
28001 IX86_BUILTIN_POR256,
28002 IX86_BUILTIN_PSADBW256,
28003 IX86_BUILTIN_PSHUFB256,
28004 IX86_BUILTIN_PSHUFD256,
28005 IX86_BUILTIN_PSHUFHW256,
28006 IX86_BUILTIN_PSHUFLW256,
28007 IX86_BUILTIN_PSIGNB256,
28008 IX86_BUILTIN_PSIGNW256,
28009 IX86_BUILTIN_PSIGND256,
28010 IX86_BUILTIN_PSLLDQI256,
28011 IX86_BUILTIN_PSLLWI256,
28012 IX86_BUILTIN_PSLLW256,
28013 IX86_BUILTIN_PSLLDI256,
28014 IX86_BUILTIN_PSLLD256,
28015 IX86_BUILTIN_PSLLQI256,
28016 IX86_BUILTIN_PSLLQ256,
28017 IX86_BUILTIN_PSRAWI256,
28018 IX86_BUILTIN_PSRAW256,
28019 IX86_BUILTIN_PSRADI256,
28020 IX86_BUILTIN_PSRAD256,
28021 IX86_BUILTIN_PSRLDQI256,
28022 IX86_BUILTIN_PSRLWI256,
28023 IX86_BUILTIN_PSRLW256,
28024 IX86_BUILTIN_PSRLDI256,
28025 IX86_BUILTIN_PSRLD256,
28026 IX86_BUILTIN_PSRLQI256,
28027 IX86_BUILTIN_PSRLQ256,
28028 IX86_BUILTIN_PSUBB256,
28029 IX86_BUILTIN_PSUBW256,
28030 IX86_BUILTIN_PSUBD256,
28031 IX86_BUILTIN_PSUBQ256,
28032 IX86_BUILTIN_PSUBSB256,
28033 IX86_BUILTIN_PSUBSW256,
28034 IX86_BUILTIN_PSUBUSB256,
28035 IX86_BUILTIN_PSUBUSW256,
28036 IX86_BUILTIN_PUNPCKHBW256,
28037 IX86_BUILTIN_PUNPCKHWD256,
28038 IX86_BUILTIN_PUNPCKHDQ256,
28039 IX86_BUILTIN_PUNPCKHQDQ256,
28040 IX86_BUILTIN_PUNPCKLBW256,
28041 IX86_BUILTIN_PUNPCKLWD256,
28042 IX86_BUILTIN_PUNPCKLDQ256,
28043 IX86_BUILTIN_PUNPCKLQDQ256,
28044 IX86_BUILTIN_PXOR256,
28045 IX86_BUILTIN_MOVNTDQA256,
28046 IX86_BUILTIN_VBROADCASTSS_PS,
28047 IX86_BUILTIN_VBROADCASTSS_PS256,
28048 IX86_BUILTIN_VBROADCASTSD_PD256,
28049 IX86_BUILTIN_VBROADCASTSI256,
28050 IX86_BUILTIN_PBLENDD256,
28051 IX86_BUILTIN_PBLENDD128,
28052 IX86_BUILTIN_PBROADCASTB256,
28053 IX86_BUILTIN_PBROADCASTW256,
28054 IX86_BUILTIN_PBROADCASTD256,
28055 IX86_BUILTIN_PBROADCASTQ256,
28056 IX86_BUILTIN_PBROADCASTB128,
28057 IX86_BUILTIN_PBROADCASTW128,
28058 IX86_BUILTIN_PBROADCASTD128,
28059 IX86_BUILTIN_PBROADCASTQ128,
28060 IX86_BUILTIN_VPERMVARSI256,
28061 IX86_BUILTIN_VPERMDF256,
28062 IX86_BUILTIN_VPERMVARSF256,
28063 IX86_BUILTIN_VPERMDI256,
28064 IX86_BUILTIN_VPERMTI256,
28065 IX86_BUILTIN_VEXTRACT128I256,
28066 IX86_BUILTIN_VINSERT128I256,
28067 IX86_BUILTIN_MASKLOADD,
28068 IX86_BUILTIN_MASKLOADQ,
28069 IX86_BUILTIN_MASKLOADD256,
28070 IX86_BUILTIN_MASKLOADQ256,
28071 IX86_BUILTIN_MASKSTORED,
28072 IX86_BUILTIN_MASKSTOREQ,
28073 IX86_BUILTIN_MASKSTORED256,
28074 IX86_BUILTIN_MASKSTOREQ256,
28075 IX86_BUILTIN_PSLLVV4DI,
28076 IX86_BUILTIN_PSLLVV2DI,
28077 IX86_BUILTIN_PSLLVV8SI,
28078 IX86_BUILTIN_PSLLVV4SI,
28079 IX86_BUILTIN_PSRAVV8SI,
28080 IX86_BUILTIN_PSRAVV4SI,
28081 IX86_BUILTIN_PSRLVV4DI,
28082 IX86_BUILTIN_PSRLVV2DI,
28083 IX86_BUILTIN_PSRLVV8SI,
28084 IX86_BUILTIN_PSRLVV4SI,
28086 IX86_BUILTIN_GATHERSIV2DF,
28087 IX86_BUILTIN_GATHERSIV4DF,
28088 IX86_BUILTIN_GATHERDIV2DF,
28089 IX86_BUILTIN_GATHERDIV4DF,
28090 IX86_BUILTIN_GATHERSIV4SF,
28091 IX86_BUILTIN_GATHERSIV8SF,
28092 IX86_BUILTIN_GATHERDIV4SF,
28093 IX86_BUILTIN_GATHERDIV8SF,
28094 IX86_BUILTIN_GATHERSIV2DI,
28095 IX86_BUILTIN_GATHERSIV4DI,
28096 IX86_BUILTIN_GATHERDIV2DI,
28097 IX86_BUILTIN_GATHERDIV4DI,
28098 IX86_BUILTIN_GATHERSIV4SI,
28099 IX86_BUILTIN_GATHERSIV8SI,
28100 IX86_BUILTIN_GATHERDIV4SI,
28101 IX86_BUILTIN_GATHERDIV8SI,
28103 /* AVX512F */
28104 IX86_BUILTIN_ADDPD512,
28105 IX86_BUILTIN_ADDPS512,
28106 IX86_BUILTIN_ADDSD_ROUND,
28107 IX86_BUILTIN_ADDSS_ROUND,
28108 IX86_BUILTIN_ALIGND512,
28109 IX86_BUILTIN_ALIGNQ512,
28110 IX86_BUILTIN_BLENDMD512,
28111 IX86_BUILTIN_BLENDMPD512,
28112 IX86_BUILTIN_BLENDMPS512,
28113 IX86_BUILTIN_BLENDMQ512,
28114 IX86_BUILTIN_BROADCASTF32X4_512,
28115 IX86_BUILTIN_BROADCASTF64X4_512,
28116 IX86_BUILTIN_BROADCASTI32X4_512,
28117 IX86_BUILTIN_BROADCASTI64X4_512,
28118 IX86_BUILTIN_BROADCASTSD512,
28119 IX86_BUILTIN_BROADCASTSS512,
28120 IX86_BUILTIN_CMPD512,
28121 IX86_BUILTIN_CMPPD512,
28122 IX86_BUILTIN_CMPPS512,
28123 IX86_BUILTIN_CMPQ512,
28124 IX86_BUILTIN_CMPSD_MASK,
28125 IX86_BUILTIN_CMPSS_MASK,
28126 IX86_BUILTIN_COMIDF,
28127 IX86_BUILTIN_COMISF,
28128 IX86_BUILTIN_COMPRESSPD512,
28129 IX86_BUILTIN_COMPRESSPDSTORE512,
28130 IX86_BUILTIN_COMPRESSPS512,
28131 IX86_BUILTIN_COMPRESSPSSTORE512,
28132 IX86_BUILTIN_CVTDQ2PD512,
28133 IX86_BUILTIN_CVTDQ2PS512,
28134 IX86_BUILTIN_CVTPD2DQ512,
28135 IX86_BUILTIN_CVTPD2PS512,
28136 IX86_BUILTIN_CVTPD2UDQ512,
28137 IX86_BUILTIN_CVTPH2PS512,
28138 IX86_BUILTIN_CVTPS2DQ512,
28139 IX86_BUILTIN_CVTPS2PD512,
28140 IX86_BUILTIN_CVTPS2PH512,
28141 IX86_BUILTIN_CVTPS2UDQ512,
28142 IX86_BUILTIN_CVTSD2SS_ROUND,
28143 IX86_BUILTIN_CVTSI2SD64,
28144 IX86_BUILTIN_CVTSI2SS32,
28145 IX86_BUILTIN_CVTSI2SS64,
28146 IX86_BUILTIN_CVTSS2SD_ROUND,
28147 IX86_BUILTIN_CVTTPD2DQ512,
28148 IX86_BUILTIN_CVTTPD2UDQ512,
28149 IX86_BUILTIN_CVTTPS2DQ512,
28150 IX86_BUILTIN_CVTTPS2UDQ512,
28151 IX86_BUILTIN_CVTUDQ2PD512,
28152 IX86_BUILTIN_CVTUDQ2PS512,
28153 IX86_BUILTIN_CVTUSI2SD32,
28154 IX86_BUILTIN_CVTUSI2SD64,
28155 IX86_BUILTIN_CVTUSI2SS32,
28156 IX86_BUILTIN_CVTUSI2SS64,
28157 IX86_BUILTIN_DIVPD512,
28158 IX86_BUILTIN_DIVPS512,
28159 IX86_BUILTIN_DIVSD_ROUND,
28160 IX86_BUILTIN_DIVSS_ROUND,
28161 IX86_BUILTIN_EXPANDPD512,
28162 IX86_BUILTIN_EXPANDPD512Z,
28163 IX86_BUILTIN_EXPANDPDLOAD512,
28164 IX86_BUILTIN_EXPANDPDLOAD512Z,
28165 IX86_BUILTIN_EXPANDPS512,
28166 IX86_BUILTIN_EXPANDPS512Z,
28167 IX86_BUILTIN_EXPANDPSLOAD512,
28168 IX86_BUILTIN_EXPANDPSLOAD512Z,
28169 IX86_BUILTIN_EXTRACTF32X4,
28170 IX86_BUILTIN_EXTRACTF64X4,
28171 IX86_BUILTIN_EXTRACTI32X4,
28172 IX86_BUILTIN_EXTRACTI64X4,
28173 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28174 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28175 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28176 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28177 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28178 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28179 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28180 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28181 IX86_BUILTIN_GETEXPPD512,
28182 IX86_BUILTIN_GETEXPPS512,
28183 IX86_BUILTIN_GETEXPSD128,
28184 IX86_BUILTIN_GETEXPSS128,
28185 IX86_BUILTIN_GETMANTPD512,
28186 IX86_BUILTIN_GETMANTPS512,
28187 IX86_BUILTIN_GETMANTSD128,
28188 IX86_BUILTIN_GETMANTSS128,
28189 IX86_BUILTIN_INSERTF32X4,
28190 IX86_BUILTIN_INSERTF64X4,
28191 IX86_BUILTIN_INSERTI32X4,
28192 IX86_BUILTIN_INSERTI64X4,
28193 IX86_BUILTIN_LOADAPD512,
28194 IX86_BUILTIN_LOADAPS512,
28195 IX86_BUILTIN_LOADDQUDI512,
28196 IX86_BUILTIN_LOADDQUSI512,
28197 IX86_BUILTIN_LOADUPD512,
28198 IX86_BUILTIN_LOADUPS512,
28199 IX86_BUILTIN_MAXPD512,
28200 IX86_BUILTIN_MAXPS512,
28201 IX86_BUILTIN_MAXSD_ROUND,
28202 IX86_BUILTIN_MAXSS_ROUND,
28203 IX86_BUILTIN_MINPD512,
28204 IX86_BUILTIN_MINPS512,
28205 IX86_BUILTIN_MINSD_ROUND,
28206 IX86_BUILTIN_MINSS_ROUND,
28207 IX86_BUILTIN_MOVAPD512,
28208 IX86_BUILTIN_MOVAPS512,
28209 IX86_BUILTIN_MOVDDUP512,
28210 IX86_BUILTIN_MOVDQA32LOAD512,
28211 IX86_BUILTIN_MOVDQA32STORE512,
28212 IX86_BUILTIN_MOVDQA32_512,
28213 IX86_BUILTIN_MOVDQA64LOAD512,
28214 IX86_BUILTIN_MOVDQA64STORE512,
28215 IX86_BUILTIN_MOVDQA64_512,
28216 IX86_BUILTIN_MOVNTDQ512,
28217 IX86_BUILTIN_MOVNTDQA512,
28218 IX86_BUILTIN_MOVNTPD512,
28219 IX86_BUILTIN_MOVNTPS512,
28220 IX86_BUILTIN_MOVSHDUP512,
28221 IX86_BUILTIN_MOVSLDUP512,
28222 IX86_BUILTIN_MULPD512,
28223 IX86_BUILTIN_MULPS512,
28224 IX86_BUILTIN_MULSD_ROUND,
28225 IX86_BUILTIN_MULSS_ROUND,
28226 IX86_BUILTIN_PABSD512,
28227 IX86_BUILTIN_PABSQ512,
28228 IX86_BUILTIN_PADDD512,
28229 IX86_BUILTIN_PADDQ512,
28230 IX86_BUILTIN_PANDD512,
28231 IX86_BUILTIN_PANDND512,
28232 IX86_BUILTIN_PANDNQ512,
28233 IX86_BUILTIN_PANDQ512,
28234 IX86_BUILTIN_PBROADCASTD512,
28235 IX86_BUILTIN_PBROADCASTD512_GPR,
28236 IX86_BUILTIN_PBROADCASTMB512,
28237 IX86_BUILTIN_PBROADCASTMW512,
28238 IX86_BUILTIN_PBROADCASTQ512,
28239 IX86_BUILTIN_PBROADCASTQ512_GPR,
28240 IX86_BUILTIN_PBROADCASTQ512_MEM,
28241 IX86_BUILTIN_PCMPEQD512_MASK,
28242 IX86_BUILTIN_PCMPEQQ512_MASK,
28243 IX86_BUILTIN_PCMPGTD512_MASK,
28244 IX86_BUILTIN_PCMPGTQ512_MASK,
28245 IX86_BUILTIN_PCOMPRESSD512,
28246 IX86_BUILTIN_PCOMPRESSDSTORE512,
28247 IX86_BUILTIN_PCOMPRESSQ512,
28248 IX86_BUILTIN_PCOMPRESSQSTORE512,
28249 IX86_BUILTIN_PEXPANDD512,
28250 IX86_BUILTIN_PEXPANDD512Z,
28251 IX86_BUILTIN_PEXPANDDLOAD512,
28252 IX86_BUILTIN_PEXPANDDLOAD512Z,
28253 IX86_BUILTIN_PEXPANDQ512,
28254 IX86_BUILTIN_PEXPANDQ512Z,
28255 IX86_BUILTIN_PEXPANDQLOAD512,
28256 IX86_BUILTIN_PEXPANDQLOAD512Z,
28257 IX86_BUILTIN_PMAXSD512,
28258 IX86_BUILTIN_PMAXSQ512,
28259 IX86_BUILTIN_PMAXUD512,
28260 IX86_BUILTIN_PMAXUQ512,
28261 IX86_BUILTIN_PMINSD512,
28262 IX86_BUILTIN_PMINSQ512,
28263 IX86_BUILTIN_PMINUD512,
28264 IX86_BUILTIN_PMINUQ512,
28265 IX86_BUILTIN_PMOVDB512,
28266 IX86_BUILTIN_PMOVDB512_MEM,
28267 IX86_BUILTIN_PMOVDW512,
28268 IX86_BUILTIN_PMOVDW512_MEM,
28269 IX86_BUILTIN_PMOVQB512,
28270 IX86_BUILTIN_PMOVQB512_MEM,
28271 IX86_BUILTIN_PMOVQD512,
28272 IX86_BUILTIN_PMOVQD512_MEM,
28273 IX86_BUILTIN_PMOVQW512,
28274 IX86_BUILTIN_PMOVQW512_MEM,
28275 IX86_BUILTIN_PMOVSDB512,
28276 IX86_BUILTIN_PMOVSDB512_MEM,
28277 IX86_BUILTIN_PMOVSDW512,
28278 IX86_BUILTIN_PMOVSDW512_MEM,
28279 IX86_BUILTIN_PMOVSQB512,
28280 IX86_BUILTIN_PMOVSQB512_MEM,
28281 IX86_BUILTIN_PMOVSQD512,
28282 IX86_BUILTIN_PMOVSQD512_MEM,
28283 IX86_BUILTIN_PMOVSQW512,
28284 IX86_BUILTIN_PMOVSQW512_MEM,
28285 IX86_BUILTIN_PMOVSXBD512,
28286 IX86_BUILTIN_PMOVSXBQ512,
28287 IX86_BUILTIN_PMOVSXDQ512,
28288 IX86_BUILTIN_PMOVSXWD512,
28289 IX86_BUILTIN_PMOVSXWQ512,
28290 IX86_BUILTIN_PMOVUSDB512,
28291 IX86_BUILTIN_PMOVUSDB512_MEM,
28292 IX86_BUILTIN_PMOVUSDW512,
28293 IX86_BUILTIN_PMOVUSDW512_MEM,
28294 IX86_BUILTIN_PMOVUSQB512,
28295 IX86_BUILTIN_PMOVUSQB512_MEM,
28296 IX86_BUILTIN_PMOVUSQD512,
28297 IX86_BUILTIN_PMOVUSQD512_MEM,
28298 IX86_BUILTIN_PMOVUSQW512,
28299 IX86_BUILTIN_PMOVUSQW512_MEM,
28300 IX86_BUILTIN_PMOVZXBD512,
28301 IX86_BUILTIN_PMOVZXBQ512,
28302 IX86_BUILTIN_PMOVZXDQ512,
28303 IX86_BUILTIN_PMOVZXWD512,
28304 IX86_BUILTIN_PMOVZXWQ512,
28305 IX86_BUILTIN_PMULDQ512,
28306 IX86_BUILTIN_PMULLD512,
28307 IX86_BUILTIN_PMULUDQ512,
28308 IX86_BUILTIN_PORD512,
28309 IX86_BUILTIN_PORQ512,
28310 IX86_BUILTIN_PROLD512,
28311 IX86_BUILTIN_PROLQ512,
28312 IX86_BUILTIN_PROLVD512,
28313 IX86_BUILTIN_PROLVQ512,
28314 IX86_BUILTIN_PRORD512,
28315 IX86_BUILTIN_PRORQ512,
28316 IX86_BUILTIN_PRORVD512,
28317 IX86_BUILTIN_PRORVQ512,
28318 IX86_BUILTIN_PSHUFD512,
28319 IX86_BUILTIN_PSLLD512,
28320 IX86_BUILTIN_PSLLDI512,
28321 IX86_BUILTIN_PSLLQ512,
28322 IX86_BUILTIN_PSLLQI512,
28323 IX86_BUILTIN_PSLLVV16SI,
28324 IX86_BUILTIN_PSLLVV8DI,
28325 IX86_BUILTIN_PSRAD512,
28326 IX86_BUILTIN_PSRADI512,
28327 IX86_BUILTIN_PSRAQ512,
28328 IX86_BUILTIN_PSRAQI512,
28329 IX86_BUILTIN_PSRAVV16SI,
28330 IX86_BUILTIN_PSRAVV8DI,
28331 IX86_BUILTIN_PSRLD512,
28332 IX86_BUILTIN_PSRLDI512,
28333 IX86_BUILTIN_PSRLQ512,
28334 IX86_BUILTIN_PSRLQI512,
28335 IX86_BUILTIN_PSRLVV16SI,
28336 IX86_BUILTIN_PSRLVV8DI,
28337 IX86_BUILTIN_PSUBD512,
28338 IX86_BUILTIN_PSUBQ512,
28339 IX86_BUILTIN_PTESTMD512,
28340 IX86_BUILTIN_PTESTMQ512,
28341 IX86_BUILTIN_PTESTNMD512,
28342 IX86_BUILTIN_PTESTNMQ512,
28343 IX86_BUILTIN_PUNPCKHDQ512,
28344 IX86_BUILTIN_PUNPCKHQDQ512,
28345 IX86_BUILTIN_PUNPCKLDQ512,
28346 IX86_BUILTIN_PUNPCKLQDQ512,
28347 IX86_BUILTIN_PXORD512,
28348 IX86_BUILTIN_PXORQ512,
28349 IX86_BUILTIN_RCP14PD512,
28350 IX86_BUILTIN_RCP14PS512,
28351 IX86_BUILTIN_RCP14SD,
28352 IX86_BUILTIN_RCP14SS,
28353 IX86_BUILTIN_RNDSCALEPD,
28354 IX86_BUILTIN_RNDSCALEPS,
28355 IX86_BUILTIN_RNDSCALESD,
28356 IX86_BUILTIN_RNDSCALESS,
28357 IX86_BUILTIN_RSQRT14PD512,
28358 IX86_BUILTIN_RSQRT14PS512,
28359 IX86_BUILTIN_RSQRT14SD,
28360 IX86_BUILTIN_RSQRT14SS,
28361 IX86_BUILTIN_SCALEFPD512,
28362 IX86_BUILTIN_SCALEFPS512,
28363 IX86_BUILTIN_SCALEFSD,
28364 IX86_BUILTIN_SCALEFSS,
28365 IX86_BUILTIN_SHUFPD512,
28366 IX86_BUILTIN_SHUFPS512,
28367 IX86_BUILTIN_SHUF_F32x4,
28368 IX86_BUILTIN_SHUF_F64x2,
28369 IX86_BUILTIN_SHUF_I32x4,
28370 IX86_BUILTIN_SHUF_I64x2,
28371 IX86_BUILTIN_SQRTPD512,
28372 IX86_BUILTIN_SQRTPD512_MASK,
28373 IX86_BUILTIN_SQRTPS512_MASK,
28374 IX86_BUILTIN_SQRTPS_NR512,
28375 IX86_BUILTIN_SQRTSD_ROUND,
28376 IX86_BUILTIN_SQRTSS_ROUND,
28377 IX86_BUILTIN_STOREAPD512,
28378 IX86_BUILTIN_STOREAPS512,
28379 IX86_BUILTIN_STOREDQUDI512,
28380 IX86_BUILTIN_STOREDQUSI512,
28381 IX86_BUILTIN_STOREUPD512,
28382 IX86_BUILTIN_STOREUPS512,
28383 IX86_BUILTIN_SUBPD512,
28384 IX86_BUILTIN_SUBPS512,
28385 IX86_BUILTIN_SUBSD_ROUND,
28386 IX86_BUILTIN_SUBSS_ROUND,
28387 IX86_BUILTIN_UCMPD512,
28388 IX86_BUILTIN_UCMPQ512,
28389 IX86_BUILTIN_UNPCKHPD512,
28390 IX86_BUILTIN_UNPCKHPS512,
28391 IX86_BUILTIN_UNPCKLPD512,
28392 IX86_BUILTIN_UNPCKLPS512,
28393 IX86_BUILTIN_VCVTSD2SI32,
28394 IX86_BUILTIN_VCVTSD2SI64,
28395 IX86_BUILTIN_VCVTSD2USI32,
28396 IX86_BUILTIN_VCVTSD2USI64,
28397 IX86_BUILTIN_VCVTSS2SI32,
28398 IX86_BUILTIN_VCVTSS2SI64,
28399 IX86_BUILTIN_VCVTSS2USI32,
28400 IX86_BUILTIN_VCVTSS2USI64,
28401 IX86_BUILTIN_VCVTTSD2SI32,
28402 IX86_BUILTIN_VCVTTSD2SI64,
28403 IX86_BUILTIN_VCVTTSD2USI32,
28404 IX86_BUILTIN_VCVTTSD2USI64,
28405 IX86_BUILTIN_VCVTTSS2SI32,
28406 IX86_BUILTIN_VCVTTSS2SI64,
28407 IX86_BUILTIN_VCVTTSS2USI32,
28408 IX86_BUILTIN_VCVTTSS2USI64,
28409 IX86_BUILTIN_VFMADDPD512_MASK,
28410 IX86_BUILTIN_VFMADDPD512_MASK3,
28411 IX86_BUILTIN_VFMADDPD512_MASKZ,
28412 IX86_BUILTIN_VFMADDPS512_MASK,
28413 IX86_BUILTIN_VFMADDPS512_MASK3,
28414 IX86_BUILTIN_VFMADDPS512_MASKZ,
28415 IX86_BUILTIN_VFMADDSD3_ROUND,
28416 IX86_BUILTIN_VFMADDSS3_ROUND,
28417 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28418 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28419 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28420 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28421 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28422 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28423 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28424 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28425 IX86_BUILTIN_VFMSUBPD512_MASK3,
28426 IX86_BUILTIN_VFMSUBPS512_MASK3,
28427 IX86_BUILTIN_VFMSUBSD3_MASK3,
28428 IX86_BUILTIN_VFMSUBSS3_MASK3,
28429 IX86_BUILTIN_VFNMADDPD512_MASK,
28430 IX86_BUILTIN_VFNMADDPS512_MASK,
28431 IX86_BUILTIN_VFNMSUBPD512_MASK,
28432 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28433 IX86_BUILTIN_VFNMSUBPS512_MASK,
28434 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28435 IX86_BUILTIN_VPCLZCNTD512,
28436 IX86_BUILTIN_VPCLZCNTQ512,
28437 IX86_BUILTIN_VPCONFLICTD512,
28438 IX86_BUILTIN_VPCONFLICTQ512,
28439 IX86_BUILTIN_VPERMDF512,
28440 IX86_BUILTIN_VPERMDI512,
28441 IX86_BUILTIN_VPERMI2VARD512,
28442 IX86_BUILTIN_VPERMI2VARPD512,
28443 IX86_BUILTIN_VPERMI2VARPS512,
28444 IX86_BUILTIN_VPERMI2VARQ512,
28445 IX86_BUILTIN_VPERMILPD512,
28446 IX86_BUILTIN_VPERMILPS512,
28447 IX86_BUILTIN_VPERMILVARPD512,
28448 IX86_BUILTIN_VPERMILVARPS512,
28449 IX86_BUILTIN_VPERMT2VARD512,
28450 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28451 IX86_BUILTIN_VPERMT2VARPD512,
28452 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28453 IX86_BUILTIN_VPERMT2VARPS512,
28454 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28455 IX86_BUILTIN_VPERMT2VARQ512,
28456 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28457 IX86_BUILTIN_VPERMVARDF512,
28458 IX86_BUILTIN_VPERMVARDI512,
28459 IX86_BUILTIN_VPERMVARSF512,
28460 IX86_BUILTIN_VPERMVARSI512,
28461 IX86_BUILTIN_VTERNLOGD512_MASK,
28462 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28463 IX86_BUILTIN_VTERNLOGQ512_MASK,
28464 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28466 /* Mask arithmetic operations */
28467 IX86_BUILTIN_KAND16,
28468 IX86_BUILTIN_KANDN16,
28469 IX86_BUILTIN_KNOT16,
28470 IX86_BUILTIN_KOR16,
28471 IX86_BUILTIN_KORTESTC16,
28472 IX86_BUILTIN_KORTESTZ16,
28473 IX86_BUILTIN_KUNPCKBW,
28474 IX86_BUILTIN_KXNOR16,
28475 IX86_BUILTIN_KXOR16,
28476 IX86_BUILTIN_KMOV16,
28478 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28479 where all operands are 32-byte or 64-byte wide respectively. */
28480 IX86_BUILTIN_GATHERALTSIV4DF,
28481 IX86_BUILTIN_GATHERALTDIV8SF,
28482 IX86_BUILTIN_GATHERALTSIV4DI,
28483 IX86_BUILTIN_GATHERALTDIV8SI,
28484 IX86_BUILTIN_GATHER3ALTDIV16SF,
28485 IX86_BUILTIN_GATHER3ALTDIV16SI,
28486 IX86_BUILTIN_GATHER3ALTSIV8DF,
28487 IX86_BUILTIN_GATHER3ALTSIV8DI,
28488 IX86_BUILTIN_GATHER3DIV16SF,
28489 IX86_BUILTIN_GATHER3DIV16SI,
28490 IX86_BUILTIN_GATHER3DIV8DF,
28491 IX86_BUILTIN_GATHER3DIV8DI,
28492 IX86_BUILTIN_GATHER3SIV16SF,
28493 IX86_BUILTIN_GATHER3SIV16SI,
28494 IX86_BUILTIN_GATHER3SIV8DF,
28495 IX86_BUILTIN_GATHER3SIV8DI,
28496 IX86_BUILTIN_SCATTERDIV16SF,
28497 IX86_BUILTIN_SCATTERDIV16SI,
28498 IX86_BUILTIN_SCATTERDIV8DF,
28499 IX86_BUILTIN_SCATTERDIV8DI,
28500 IX86_BUILTIN_SCATTERSIV16SF,
28501 IX86_BUILTIN_SCATTERSIV16SI,
28502 IX86_BUILTIN_SCATTERSIV8DF,
28503 IX86_BUILTIN_SCATTERSIV8DI,
28505 /* AVX512PF */
28506 IX86_BUILTIN_GATHERPFQPD,
28507 IX86_BUILTIN_GATHERPFDPS,
28508 IX86_BUILTIN_GATHERPFDPD,
28509 IX86_BUILTIN_GATHERPFQPS,
28510 IX86_BUILTIN_SCATTERPFDPD,
28511 IX86_BUILTIN_SCATTERPFDPS,
28512 IX86_BUILTIN_SCATTERPFQPD,
28513 IX86_BUILTIN_SCATTERPFQPS,
28515 /* AVX-512ER */
28516 IX86_BUILTIN_EXP2PD_MASK,
28517 IX86_BUILTIN_EXP2PS_MASK,
28518 IX86_BUILTIN_EXP2PS,
28519 IX86_BUILTIN_RCP28PD,
28520 IX86_BUILTIN_RCP28PS,
28521 IX86_BUILTIN_RCP28SD,
28522 IX86_BUILTIN_RCP28SS,
28523 IX86_BUILTIN_RSQRT28PD,
28524 IX86_BUILTIN_RSQRT28PS,
28525 IX86_BUILTIN_RSQRT28SD,
28526 IX86_BUILTIN_RSQRT28SS,
28528 /* SHA builtins. */
28529 IX86_BUILTIN_SHA1MSG1,
28530 IX86_BUILTIN_SHA1MSG2,
28531 IX86_BUILTIN_SHA1NEXTE,
28532 IX86_BUILTIN_SHA1RNDS4,
28533 IX86_BUILTIN_SHA256MSG1,
28534 IX86_BUILTIN_SHA256MSG2,
28535 IX86_BUILTIN_SHA256RNDS2,
28537 /* CLFLUSHOPT instructions. */
28538 IX86_BUILTIN_CLFLUSHOPT,
28540 /* TFmode support builtins. */
28541 IX86_BUILTIN_INFQ,
28542 IX86_BUILTIN_HUGE_VALQ,
28543 IX86_BUILTIN_FABSQ,
28544 IX86_BUILTIN_COPYSIGNQ,
28546 /* Vectorizer support builtins. */
28547 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28548 IX86_BUILTIN_CPYSGNPS,
28549 IX86_BUILTIN_CPYSGNPD,
28550 IX86_BUILTIN_CPYSGNPS256,
28551 IX86_BUILTIN_CPYSGNPS512,
28552 IX86_BUILTIN_CPYSGNPD256,
28553 IX86_BUILTIN_CPYSGNPD512,
28554 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28555 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28558 /* FMA4 instructions. */
28559 IX86_BUILTIN_VFMADDSS,
28560 IX86_BUILTIN_VFMADDSD,
28561 IX86_BUILTIN_VFMADDPS,
28562 IX86_BUILTIN_VFMADDPD,
28563 IX86_BUILTIN_VFMADDPS256,
28564 IX86_BUILTIN_VFMADDPD256,
28565 IX86_BUILTIN_VFMADDSUBPS,
28566 IX86_BUILTIN_VFMADDSUBPD,
28567 IX86_BUILTIN_VFMADDSUBPS256,
28568 IX86_BUILTIN_VFMADDSUBPD256,
28570 /* FMA3 instructions. */
28571 IX86_BUILTIN_VFMADDSS3,
28572 IX86_BUILTIN_VFMADDSD3,
28574 /* XOP instructions. */
28575 IX86_BUILTIN_VPCMOV,
28576 IX86_BUILTIN_VPCMOV_V2DI,
28577 IX86_BUILTIN_VPCMOV_V4SI,
28578 IX86_BUILTIN_VPCMOV_V8HI,
28579 IX86_BUILTIN_VPCMOV_V16QI,
28580 IX86_BUILTIN_VPCMOV_V4SF,
28581 IX86_BUILTIN_VPCMOV_V2DF,
28582 IX86_BUILTIN_VPCMOV256,
28583 IX86_BUILTIN_VPCMOV_V4DI256,
28584 IX86_BUILTIN_VPCMOV_V8SI256,
28585 IX86_BUILTIN_VPCMOV_V16HI256,
28586 IX86_BUILTIN_VPCMOV_V32QI256,
28587 IX86_BUILTIN_VPCMOV_V8SF256,
28588 IX86_BUILTIN_VPCMOV_V4DF256,
28590 IX86_BUILTIN_VPPERM,
28592 IX86_BUILTIN_VPMACSSWW,
28593 IX86_BUILTIN_VPMACSWW,
28594 IX86_BUILTIN_VPMACSSWD,
28595 IX86_BUILTIN_VPMACSWD,
28596 IX86_BUILTIN_VPMACSSDD,
28597 IX86_BUILTIN_VPMACSDD,
28598 IX86_BUILTIN_VPMACSSDQL,
28599 IX86_BUILTIN_VPMACSSDQH,
28600 IX86_BUILTIN_VPMACSDQL,
28601 IX86_BUILTIN_VPMACSDQH,
28602 IX86_BUILTIN_VPMADCSSWD,
28603 IX86_BUILTIN_VPMADCSWD,
28605 IX86_BUILTIN_VPHADDBW,
28606 IX86_BUILTIN_VPHADDBD,
28607 IX86_BUILTIN_VPHADDBQ,
28608 IX86_BUILTIN_VPHADDWD,
28609 IX86_BUILTIN_VPHADDWQ,
28610 IX86_BUILTIN_VPHADDDQ,
28611 IX86_BUILTIN_VPHADDUBW,
28612 IX86_BUILTIN_VPHADDUBD,
28613 IX86_BUILTIN_VPHADDUBQ,
28614 IX86_BUILTIN_VPHADDUWD,
28615 IX86_BUILTIN_VPHADDUWQ,
28616 IX86_BUILTIN_VPHADDUDQ,
28617 IX86_BUILTIN_VPHSUBBW,
28618 IX86_BUILTIN_VPHSUBWD,
28619 IX86_BUILTIN_VPHSUBDQ,
28621 IX86_BUILTIN_VPROTB,
28622 IX86_BUILTIN_VPROTW,
28623 IX86_BUILTIN_VPROTD,
28624 IX86_BUILTIN_VPROTQ,
28625 IX86_BUILTIN_VPROTB_IMM,
28626 IX86_BUILTIN_VPROTW_IMM,
28627 IX86_BUILTIN_VPROTD_IMM,
28628 IX86_BUILTIN_VPROTQ_IMM,
28630 IX86_BUILTIN_VPSHLB,
28631 IX86_BUILTIN_VPSHLW,
28632 IX86_BUILTIN_VPSHLD,
28633 IX86_BUILTIN_VPSHLQ,
28634 IX86_BUILTIN_VPSHAB,
28635 IX86_BUILTIN_VPSHAW,
28636 IX86_BUILTIN_VPSHAD,
28637 IX86_BUILTIN_VPSHAQ,
28639 IX86_BUILTIN_VFRCZSS,
28640 IX86_BUILTIN_VFRCZSD,
28641 IX86_BUILTIN_VFRCZPS,
28642 IX86_BUILTIN_VFRCZPD,
28643 IX86_BUILTIN_VFRCZPS256,
28644 IX86_BUILTIN_VFRCZPD256,
28646 IX86_BUILTIN_VPCOMEQUB,
28647 IX86_BUILTIN_VPCOMNEUB,
28648 IX86_BUILTIN_VPCOMLTUB,
28649 IX86_BUILTIN_VPCOMLEUB,
28650 IX86_BUILTIN_VPCOMGTUB,
28651 IX86_BUILTIN_VPCOMGEUB,
28652 IX86_BUILTIN_VPCOMFALSEUB,
28653 IX86_BUILTIN_VPCOMTRUEUB,
28655 IX86_BUILTIN_VPCOMEQUW,
28656 IX86_BUILTIN_VPCOMNEUW,
28657 IX86_BUILTIN_VPCOMLTUW,
28658 IX86_BUILTIN_VPCOMLEUW,
28659 IX86_BUILTIN_VPCOMGTUW,
28660 IX86_BUILTIN_VPCOMGEUW,
28661 IX86_BUILTIN_VPCOMFALSEUW,
28662 IX86_BUILTIN_VPCOMTRUEUW,
28664 IX86_BUILTIN_VPCOMEQUD,
28665 IX86_BUILTIN_VPCOMNEUD,
28666 IX86_BUILTIN_VPCOMLTUD,
28667 IX86_BUILTIN_VPCOMLEUD,
28668 IX86_BUILTIN_VPCOMGTUD,
28669 IX86_BUILTIN_VPCOMGEUD,
28670 IX86_BUILTIN_VPCOMFALSEUD,
28671 IX86_BUILTIN_VPCOMTRUEUD,
28673 IX86_BUILTIN_VPCOMEQUQ,
28674 IX86_BUILTIN_VPCOMNEUQ,
28675 IX86_BUILTIN_VPCOMLTUQ,
28676 IX86_BUILTIN_VPCOMLEUQ,
28677 IX86_BUILTIN_VPCOMGTUQ,
28678 IX86_BUILTIN_VPCOMGEUQ,
28679 IX86_BUILTIN_VPCOMFALSEUQ,
28680 IX86_BUILTIN_VPCOMTRUEUQ,
28682 IX86_BUILTIN_VPCOMEQB,
28683 IX86_BUILTIN_VPCOMNEB,
28684 IX86_BUILTIN_VPCOMLTB,
28685 IX86_BUILTIN_VPCOMLEB,
28686 IX86_BUILTIN_VPCOMGTB,
28687 IX86_BUILTIN_VPCOMGEB,
28688 IX86_BUILTIN_VPCOMFALSEB,
28689 IX86_BUILTIN_VPCOMTRUEB,
28691 IX86_BUILTIN_VPCOMEQW,
28692 IX86_BUILTIN_VPCOMNEW,
28693 IX86_BUILTIN_VPCOMLTW,
28694 IX86_BUILTIN_VPCOMLEW,
28695 IX86_BUILTIN_VPCOMGTW,
28696 IX86_BUILTIN_VPCOMGEW,
28697 IX86_BUILTIN_VPCOMFALSEW,
28698 IX86_BUILTIN_VPCOMTRUEW,
28700 IX86_BUILTIN_VPCOMEQD,
28701 IX86_BUILTIN_VPCOMNED,
28702 IX86_BUILTIN_VPCOMLTD,
28703 IX86_BUILTIN_VPCOMLED,
28704 IX86_BUILTIN_VPCOMGTD,
28705 IX86_BUILTIN_VPCOMGED,
28706 IX86_BUILTIN_VPCOMFALSED,
28707 IX86_BUILTIN_VPCOMTRUED,
28709 IX86_BUILTIN_VPCOMEQQ,
28710 IX86_BUILTIN_VPCOMNEQ,
28711 IX86_BUILTIN_VPCOMLTQ,
28712 IX86_BUILTIN_VPCOMLEQ,
28713 IX86_BUILTIN_VPCOMGTQ,
28714 IX86_BUILTIN_VPCOMGEQ,
28715 IX86_BUILTIN_VPCOMFALSEQ,
28716 IX86_BUILTIN_VPCOMTRUEQ,
28718 /* LWP instructions. */
28719 IX86_BUILTIN_LLWPCB,
28720 IX86_BUILTIN_SLWPCB,
28721 IX86_BUILTIN_LWPVAL32,
28722 IX86_BUILTIN_LWPVAL64,
28723 IX86_BUILTIN_LWPINS32,
28724 IX86_BUILTIN_LWPINS64,
28726 IX86_BUILTIN_CLZS,
28728 /* RTM */
28729 IX86_BUILTIN_XBEGIN,
28730 IX86_BUILTIN_XEND,
28731 IX86_BUILTIN_XABORT,
28732 IX86_BUILTIN_XTEST,
28734 /* BMI instructions. */
28735 IX86_BUILTIN_BEXTR32,
28736 IX86_BUILTIN_BEXTR64,
28737 IX86_BUILTIN_CTZS,
28739 /* TBM instructions. */
28740 IX86_BUILTIN_BEXTRI32,
28741 IX86_BUILTIN_BEXTRI64,
28743 /* BMI2 instructions. */
28744 IX86_BUILTIN_BZHI32,
28745 IX86_BUILTIN_BZHI64,
28746 IX86_BUILTIN_PDEP32,
28747 IX86_BUILTIN_PDEP64,
28748 IX86_BUILTIN_PEXT32,
28749 IX86_BUILTIN_PEXT64,
28751 /* ADX instructions. */
28752 IX86_BUILTIN_ADDCARRYX32,
28753 IX86_BUILTIN_ADDCARRYX64,
28755 /* FSGSBASE instructions. */
28756 IX86_BUILTIN_RDFSBASE32,
28757 IX86_BUILTIN_RDFSBASE64,
28758 IX86_BUILTIN_RDGSBASE32,
28759 IX86_BUILTIN_RDGSBASE64,
28760 IX86_BUILTIN_WRFSBASE32,
28761 IX86_BUILTIN_WRFSBASE64,
28762 IX86_BUILTIN_WRGSBASE32,
28763 IX86_BUILTIN_WRGSBASE64,
28765 /* RDRND instructions. */
28766 IX86_BUILTIN_RDRAND16_STEP,
28767 IX86_BUILTIN_RDRAND32_STEP,
28768 IX86_BUILTIN_RDRAND64_STEP,
28770 /* RDSEED instructions. */
28771 IX86_BUILTIN_RDSEED16_STEP,
28772 IX86_BUILTIN_RDSEED32_STEP,
28773 IX86_BUILTIN_RDSEED64_STEP,
28775 /* F16C instructions. */
28776 IX86_BUILTIN_CVTPH2PS,
28777 IX86_BUILTIN_CVTPH2PS256,
28778 IX86_BUILTIN_CVTPS2PH,
28779 IX86_BUILTIN_CVTPS2PH256,
28781 /* CFString built-in for darwin */
28782 IX86_BUILTIN_CFSTRING,
28784 /* Builtins to get CPU type and supported features. */
28785 IX86_BUILTIN_CPU_INIT,
28786 IX86_BUILTIN_CPU_IS,
28787 IX86_BUILTIN_CPU_SUPPORTS,
28789 /* Read/write FLAGS register built-ins. */
28790 IX86_BUILTIN_READ_FLAGS,
28791 IX86_BUILTIN_WRITE_FLAGS,
28793 IX86_BUILTIN_MAX
28796 /* Table for the ix86 builtin decls. */
28797 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28799 /* Table of all of the builtin functions that are possible with different ISA's
28800 but are waiting to be built until a function is declared to use that
28801 ISA. */
28802 struct builtin_isa {
28803 const char *name; /* function name */
28804 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28805 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28806 bool const_p; /* true if the declaration is constant */
28807 bool set_and_not_built_p;
28810 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28813 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28814 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28815 function decl in the ix86_builtins array. Returns the function decl or
28816 NULL_TREE, if the builtin was not added.
28818 If the front end has a special hook for builtin functions, delay adding
28819 builtin functions that aren't in the current ISA until the ISA is changed
28820 with function specific optimization. Doing so, can save about 300K for the
28821 default compiler. When the builtin is expanded, check at that time whether
28822 it is valid.
28824 If the front end doesn't have a special hook, record all builtins, even if
28825 it isn't an instruction set in the current ISA in case the user uses
28826 function specific options for a different ISA, so that we don't get scope
28827 errors if a builtin is added in the middle of a function scope. */
28829 static inline tree
28830 def_builtin (HOST_WIDE_INT mask, const char *name,
28831 enum ix86_builtin_func_type tcode,
28832 enum ix86_builtins code)
28834 tree decl = NULL_TREE;
28836 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28838 ix86_builtins_isa[(int) code].isa = mask;
28840 mask &= ~OPTION_MASK_ISA_64BIT;
28841 if (mask == 0
28842 || (mask & ix86_isa_flags) != 0
28843 || (lang_hooks.builtin_function
28844 == lang_hooks.builtin_function_ext_scope))
28847 tree type = ix86_get_builtin_func_type (tcode);
28848 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28849 NULL, NULL_TREE);
28850 ix86_builtins[(int) code] = decl;
28851 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28853 else
28855 ix86_builtins[(int) code] = NULL_TREE;
28856 ix86_builtins_isa[(int) code].tcode = tcode;
28857 ix86_builtins_isa[(int) code].name = name;
28858 ix86_builtins_isa[(int) code].const_p = false;
28859 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28863 return decl;
28866 /* Like def_builtin, but also marks the function decl "const". */
28868 static inline tree
28869 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28870 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28872 tree decl = def_builtin (mask, name, tcode, code);
28873 if (decl)
28874 TREE_READONLY (decl) = 1;
28875 else
28876 ix86_builtins_isa[(int) code].const_p = true;
28878 return decl;
28881 /* Add any new builtin functions for a given ISA that may not have been
28882 declared. This saves a bit of space compared to adding all of the
28883 declarations to the tree, even if we didn't use them. */
28885 static void
28886 ix86_add_new_builtins (HOST_WIDE_INT isa)
28888 int i;
28890 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28892 if ((ix86_builtins_isa[i].isa & isa) != 0
28893 && ix86_builtins_isa[i].set_and_not_built_p)
28895 tree decl, type;
28897 /* Don't define the builtin again. */
28898 ix86_builtins_isa[i].set_and_not_built_p = false;
28900 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28901 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28902 type, i, BUILT_IN_MD, NULL,
28903 NULL_TREE);
28905 ix86_builtins[i] = decl;
28906 if (ix86_builtins_isa[i].const_p)
28907 TREE_READONLY (decl) = 1;
28912 /* Bits for builtin_description.flag. */
28914 /* Set when we don't support the comparison natively, and should
28915 swap_comparison in order to support it. */
28916 #define BUILTIN_DESC_SWAP_OPERANDS 1
28918 struct builtin_description
28920 const HOST_WIDE_INT mask;
28921 const enum insn_code icode;
28922 const char *const name;
28923 const enum ix86_builtins code;
28924 const enum rtx_code comparison;
28925 const int flag;
28928 static const struct builtin_description bdesc_comi[] =
28930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28956 static const struct builtin_description bdesc_pcmpestr[] =
28958 /* SSE4.2 */
28959 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28960 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28961 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28962 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28963 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28964 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28965 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28968 static const struct builtin_description bdesc_pcmpistr[] =
28970 /* SSE4.2 */
28971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28975 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28976 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28977 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28980 /* Special builtins with variable number of arguments. */
28981 static const struct builtin_description bdesc_special_args[] =
28983 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28984 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28985 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28987 /* 80387 (for use internally for atomic compound assignment). */
28988 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28989 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28990 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28991 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28993 /* MMX */
28994 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28996 /* 3DNow! */
28997 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28999 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29000 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29001 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29002 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29003 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29004 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29005 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29006 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29007 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29009 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29010 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29011 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29012 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29013 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29014 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29015 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29016 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29018 /* SSE */
29019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29028 /* SSE or 3DNow!A */
29029 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29030 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29032 /* SSE2 */
29033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29040 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29047 /* SSE3 */
29048 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29050 /* SSE4.1 */
29051 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29053 /* SSE4A */
29054 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29055 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29057 /* AVX */
29058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29061 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29062 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29063 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29088 /* AVX2 */
29089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29099 /* AVX512F */
29100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29148 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29149 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29150 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29151 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29152 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29153 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29155 /* FSGSBASE */
29156 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29157 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29158 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29159 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29160 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29161 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29162 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29163 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29165 /* RTM */
29166 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29167 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29168 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29171 /* Builtins with variable number of arguments. */
29172 static const struct builtin_description bdesc_args[] =
29174 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29175 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29176 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29177 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29178 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29179 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29180 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29182 /* MMX */
29183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29246 /* 3DNow! */
29247 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29248 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29249 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29250 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29252 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29253 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29255 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29260 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29268 /* 3DNow!A */
29269 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29270 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29271 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29272 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29273 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29274 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29276 /* SSE */
29277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29285 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29288 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29292 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29293 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29322 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29323 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29327 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29330 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29337 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29342 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29344 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29350 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29351 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29353 /* SSE MMX or 3Dnow!A */
29354 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29355 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29356 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29358 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29359 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29360 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29361 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29363 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29364 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29366 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29368 /* SSE2 */
29369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29387 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29388 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29496 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29500 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29505 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29510 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29512 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29514 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29519 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29522 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29536 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29538 /* SSE2 MMX */
29539 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29540 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29542 /* SSE3 */
29543 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29544 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29546 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29547 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29548 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29549 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29550 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29551 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29553 /* SSSE3 */
29554 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29555 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29556 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29557 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29558 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29559 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29561 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29562 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29586 /* SSSE3. */
29587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29590 /* SSE4.1 */
29591 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29592 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29593 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29594 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29595 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29596 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29597 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29598 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29629 /* SSE4.1 */
29630 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29631 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29632 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29633 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29635 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29636 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29637 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29638 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29641 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29643 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29644 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29646 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29647 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29648 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29651 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29652 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29654 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29655 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29657 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29658 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29659 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29661 /* SSE4.2 */
29662 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29663 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29664 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29665 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29666 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29668 /* SSE4A */
29669 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29670 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29671 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29672 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29674 /* AES */
29675 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29676 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29678 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29679 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29680 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29681 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29683 /* PCLMUL */
29684 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29686 /* AVX */
29687 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29688 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29788 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29799 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29823 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29825 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29827 /* AVX2 */
29828 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29975 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29977 /* BMI */
29978 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29979 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29980 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29982 /* TBM */
29983 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29984 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29986 /* F16C */
29987 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29988 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29989 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29990 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29992 /* BMI2 */
29993 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29994 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29995 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29996 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29997 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29998 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30000 /* AVX512F */
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30050 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30051 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30053 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30054 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30162 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30163 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30164 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30165 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30197 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30202 /* Mask arithmetic operations */
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30214 /* SHA */
30215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30224 /* Builtins with rounding support. */
30225 static const struct builtin_description bdesc_round_args[] =
30227 /* AVX512F */
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30247 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30249 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30256 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30258 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30308 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30310 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30312 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30314 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30316 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30318 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30320 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30322 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30348 /* AVX512ER */
30349 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30350 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30351 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30352 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30353 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30354 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30355 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30356 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30357 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30358 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30361 /* FMA4 and XOP. */
30362 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30363 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30364 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30365 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30366 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30367 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30368 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30369 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30370 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30371 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30372 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30373 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30374 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30375 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30376 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30377 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30378 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30379 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30380 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30381 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30382 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30383 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30384 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30385 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30386 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30387 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30388 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30389 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30390 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30391 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30392 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30393 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30394 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30395 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30396 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30397 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30398 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30399 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30400 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30401 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30402 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30403 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30404 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30405 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30406 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30407 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30408 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30409 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30410 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30411 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30412 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30413 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30415 static const struct builtin_description bdesc_multi_arg[] =
30417 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30418 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30419 UNKNOWN, (int)MULTI_ARG_3_SF },
30420 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30421 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30422 UNKNOWN, (int)MULTI_ARG_3_DF },
30424 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30425 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30426 UNKNOWN, (int)MULTI_ARG_3_SF },
30427 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30428 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30429 UNKNOWN, (int)MULTI_ARG_3_DF },
30431 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30432 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30433 UNKNOWN, (int)MULTI_ARG_3_SF },
30434 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30435 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30436 UNKNOWN, (int)MULTI_ARG_3_DF },
30437 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30438 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30439 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30440 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30441 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30442 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30444 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30445 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30446 UNKNOWN, (int)MULTI_ARG_3_SF },
30447 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30448 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30449 UNKNOWN, (int)MULTI_ARG_3_DF },
30450 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30451 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30452 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30453 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30454 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30455 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30617 /* TM vector builtins. */
30619 /* Reuse the existing x86-specific `struct builtin_description' cause
30620 we're lazy. Add casts to make them fit. */
30621 static const struct builtin_description bdesc_tm[] =
30623 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30624 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30625 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30626 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30627 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30628 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30629 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30631 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30632 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30633 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30634 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30635 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30636 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30637 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30639 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30640 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30641 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30642 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30643 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30644 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30645 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30647 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30648 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30649 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30652 /* TM callbacks. */
30654 /* Return the builtin decl needed to load a vector of TYPE. */
30656 static tree
30657 ix86_builtin_tm_load (tree type)
30659 if (TREE_CODE (type) == VECTOR_TYPE)
30661 switch (tree_to_uhwi (TYPE_SIZE (type)))
30663 case 64:
30664 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30665 case 128:
30666 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30667 case 256:
30668 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30671 return NULL_TREE;
30674 /* Return the builtin decl needed to store a vector of TYPE. */
30676 static tree
30677 ix86_builtin_tm_store (tree type)
30679 if (TREE_CODE (type) == VECTOR_TYPE)
30681 switch (tree_to_uhwi (TYPE_SIZE (type)))
30683 case 64:
30684 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30685 case 128:
30686 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30687 case 256:
30688 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30691 return NULL_TREE;
30694 /* Initialize the transactional memory vector load/store builtins. */
30696 static void
30697 ix86_init_tm_builtins (void)
30699 enum ix86_builtin_func_type ftype;
30700 const struct builtin_description *d;
30701 size_t i;
30702 tree decl;
30703 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30704 tree attrs_log, attrs_type_log;
30706 if (!flag_tm)
30707 return;
30709 /* If there are no builtins defined, we must be compiling in a
30710 language without trans-mem support. */
30711 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30712 return;
30714 /* Use whatever attributes a normal TM load has. */
30715 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30716 attrs_load = DECL_ATTRIBUTES (decl);
30717 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30718 /* Use whatever attributes a normal TM store has. */
30719 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30720 attrs_store = DECL_ATTRIBUTES (decl);
30721 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30722 /* Use whatever attributes a normal TM log has. */
30723 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30724 attrs_log = DECL_ATTRIBUTES (decl);
30725 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30727 for (i = 0, d = bdesc_tm;
30728 i < ARRAY_SIZE (bdesc_tm);
30729 i++, d++)
30731 if ((d->mask & ix86_isa_flags) != 0
30732 || (lang_hooks.builtin_function
30733 == lang_hooks.builtin_function_ext_scope))
30735 tree type, attrs, attrs_type;
30736 enum built_in_function code = (enum built_in_function) d->code;
30738 ftype = (enum ix86_builtin_func_type) d->flag;
30739 type = ix86_get_builtin_func_type (ftype);
30741 if (BUILTIN_TM_LOAD_P (code))
30743 attrs = attrs_load;
30744 attrs_type = attrs_type_load;
30746 else if (BUILTIN_TM_STORE_P (code))
30748 attrs = attrs_store;
30749 attrs_type = attrs_type_store;
30751 else
30753 attrs = attrs_log;
30754 attrs_type = attrs_type_log;
30756 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30757 /* The builtin without the prefix for
30758 calling it directly. */
30759 d->name + strlen ("__builtin_"),
30760 attrs);
30761 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30762 set the TYPE_ATTRIBUTES. */
30763 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30765 set_builtin_decl (code, decl, false);
30770 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30771 in the current target ISA to allow the user to compile particular modules
30772 with different target specific options that differ from the command line
30773 options. */
30774 static void
30775 ix86_init_mmx_sse_builtins (void)
30777 const struct builtin_description * d;
30778 enum ix86_builtin_func_type ftype;
30779 size_t i;
30781 /* Add all special builtins with variable number of operands. */
30782 for (i = 0, d = bdesc_special_args;
30783 i < ARRAY_SIZE (bdesc_special_args);
30784 i++, d++)
30786 if (d->name == 0)
30787 continue;
30789 ftype = (enum ix86_builtin_func_type) d->flag;
30790 def_builtin (d->mask, d->name, ftype, d->code);
30793 /* Add all builtins with variable number of operands. */
30794 for (i = 0, d = bdesc_args;
30795 i < ARRAY_SIZE (bdesc_args);
30796 i++, d++)
30798 if (d->name == 0)
30799 continue;
30801 ftype = (enum ix86_builtin_func_type) d->flag;
30802 def_builtin_const (d->mask, d->name, ftype, d->code);
30805 /* Add all builtins with rounding. */
30806 for (i = 0, d = bdesc_round_args;
30807 i < ARRAY_SIZE (bdesc_round_args);
30808 i++, d++)
30810 if (d->name == 0)
30811 continue;
30813 ftype = (enum ix86_builtin_func_type) d->flag;
30814 def_builtin_const (d->mask, d->name, ftype, d->code);
30817 /* pcmpestr[im] insns. */
30818 for (i = 0, d = bdesc_pcmpestr;
30819 i < ARRAY_SIZE (bdesc_pcmpestr);
30820 i++, d++)
30822 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30823 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30824 else
30825 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30826 def_builtin_const (d->mask, d->name, ftype, d->code);
30829 /* pcmpistr[im] insns. */
30830 for (i = 0, d = bdesc_pcmpistr;
30831 i < ARRAY_SIZE (bdesc_pcmpistr);
30832 i++, d++)
30834 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30835 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30836 else
30837 ftype = INT_FTYPE_V16QI_V16QI_INT;
30838 def_builtin_const (d->mask, d->name, ftype, d->code);
30841 /* comi/ucomi insns. */
30842 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30844 if (d->mask == OPTION_MASK_ISA_SSE2)
30845 ftype = INT_FTYPE_V2DF_V2DF;
30846 else
30847 ftype = INT_FTYPE_V4SF_V4SF;
30848 def_builtin_const (d->mask, d->name, ftype, d->code);
30851 /* SSE */
30852 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30853 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30854 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30855 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30857 /* SSE or 3DNow!A */
30858 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30859 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30860 IX86_BUILTIN_MASKMOVQ);
30862 /* SSE2 */
30863 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30864 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30866 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30867 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30868 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30869 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30871 /* SSE3. */
30872 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30873 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30874 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30875 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30877 /* AES */
30878 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30879 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30880 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30881 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30882 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30883 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30884 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30885 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30886 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30887 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30888 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30889 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30891 /* PCLMUL */
30892 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30893 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30895 /* RDRND */
30896 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30897 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30898 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30899 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30900 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30901 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30902 IX86_BUILTIN_RDRAND64_STEP);
30904 /* AVX2 */
30905 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30906 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30907 IX86_BUILTIN_GATHERSIV2DF);
30909 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30910 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30911 IX86_BUILTIN_GATHERSIV4DF);
30913 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30914 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30915 IX86_BUILTIN_GATHERDIV2DF);
30917 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30918 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30919 IX86_BUILTIN_GATHERDIV4DF);
30921 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30922 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30923 IX86_BUILTIN_GATHERSIV4SF);
30925 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30926 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30927 IX86_BUILTIN_GATHERSIV8SF);
30929 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30930 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30931 IX86_BUILTIN_GATHERDIV4SF);
30933 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30934 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30935 IX86_BUILTIN_GATHERDIV8SF);
30937 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30938 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30939 IX86_BUILTIN_GATHERSIV2DI);
30941 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30942 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30943 IX86_BUILTIN_GATHERSIV4DI);
30945 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30946 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30947 IX86_BUILTIN_GATHERDIV2DI);
30949 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30950 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30951 IX86_BUILTIN_GATHERDIV4DI);
30953 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30954 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30955 IX86_BUILTIN_GATHERSIV4SI);
30957 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30958 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30959 IX86_BUILTIN_GATHERSIV8SI);
30961 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30962 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30963 IX86_BUILTIN_GATHERDIV4SI);
30965 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30966 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30967 IX86_BUILTIN_GATHERDIV8SI);
30969 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30970 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30971 IX86_BUILTIN_GATHERALTSIV4DF);
30973 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30974 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30975 IX86_BUILTIN_GATHERALTDIV8SF);
30977 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30978 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30979 IX86_BUILTIN_GATHERALTSIV4DI);
30981 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30982 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30983 IX86_BUILTIN_GATHERALTDIV8SI);
30985 /* AVX512F */
30986 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30987 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30988 IX86_BUILTIN_GATHER3SIV16SF);
30990 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30991 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30992 IX86_BUILTIN_GATHER3SIV8DF);
30994 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30995 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30996 IX86_BUILTIN_GATHER3DIV16SF);
30998 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30999 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31000 IX86_BUILTIN_GATHER3DIV8DF);
31002 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31003 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31004 IX86_BUILTIN_GATHER3SIV16SI);
31006 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31007 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31008 IX86_BUILTIN_GATHER3SIV8DI);
31010 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31011 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31012 IX86_BUILTIN_GATHER3DIV16SI);
31014 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31015 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31016 IX86_BUILTIN_GATHER3DIV8DI);
31018 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31019 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31020 IX86_BUILTIN_GATHER3ALTSIV8DF);
31022 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31023 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31024 IX86_BUILTIN_GATHER3ALTDIV16SF);
31026 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31027 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31028 IX86_BUILTIN_GATHER3ALTSIV8DI);
31030 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31031 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31032 IX86_BUILTIN_GATHER3ALTDIV16SI);
31034 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31035 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31036 IX86_BUILTIN_SCATTERSIV16SF);
31038 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31039 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31040 IX86_BUILTIN_SCATTERSIV8DF);
31042 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31043 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31044 IX86_BUILTIN_SCATTERDIV16SF);
31046 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31047 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31048 IX86_BUILTIN_SCATTERDIV8DF);
31050 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31051 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31052 IX86_BUILTIN_SCATTERSIV16SI);
31054 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31055 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31056 IX86_BUILTIN_SCATTERSIV8DI);
31058 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31059 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31060 IX86_BUILTIN_SCATTERDIV16SI);
31062 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31063 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31064 IX86_BUILTIN_SCATTERDIV8DI);
31066 /* AVX512PF */
31067 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31068 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31069 IX86_BUILTIN_GATHERPFDPD);
31070 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31071 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31072 IX86_BUILTIN_GATHERPFDPS);
31073 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31074 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31075 IX86_BUILTIN_GATHERPFQPD);
31076 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31077 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31078 IX86_BUILTIN_GATHERPFQPS);
31079 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31080 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31081 IX86_BUILTIN_SCATTERPFDPD);
31082 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31083 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31084 IX86_BUILTIN_SCATTERPFDPS);
31085 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31086 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31087 IX86_BUILTIN_SCATTERPFQPD);
31088 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31089 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31090 IX86_BUILTIN_SCATTERPFQPS);
31092 /* SHA */
31093 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31094 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31095 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31096 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31097 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31098 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31099 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31100 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31101 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31102 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31103 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31104 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31105 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31106 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31108 /* RTM. */
31109 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31110 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31112 /* MMX access to the vec_init patterns. */
31113 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31114 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31116 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31117 V4HI_FTYPE_HI_HI_HI_HI,
31118 IX86_BUILTIN_VEC_INIT_V4HI);
31120 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31121 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31122 IX86_BUILTIN_VEC_INIT_V8QI);
31124 /* Access to the vec_extract patterns. */
31125 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31126 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31127 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31128 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31129 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31130 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31131 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31132 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31133 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31134 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31136 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31137 "__builtin_ia32_vec_ext_v4hi",
31138 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31140 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31141 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31143 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31144 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31146 /* Access to the vec_set patterns. */
31147 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31148 "__builtin_ia32_vec_set_v2di",
31149 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31151 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31152 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31154 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31155 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31157 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31158 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31160 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31161 "__builtin_ia32_vec_set_v4hi",
31162 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31164 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31165 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31167 /* RDSEED */
31168 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31169 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31170 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31171 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31172 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31173 "__builtin_ia32_rdseed_di_step",
31174 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31176 /* ADCX */
31177 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31178 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31179 def_builtin (OPTION_MASK_ISA_64BIT,
31180 "__builtin_ia32_addcarryx_u64",
31181 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31182 IX86_BUILTIN_ADDCARRYX64);
31184 /* Read/write FLAGS. */
31185 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31186 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31187 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31188 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31189 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31190 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31191 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31192 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31194 /* CLFLUSHOPT. */
31195 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31196 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31198 /* Add FMA4 multi-arg argument instructions */
31199 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31201 if (d->name == 0)
31202 continue;
31204 ftype = (enum ix86_builtin_func_type) d->flag;
31205 def_builtin_const (d->mask, d->name, ftype, d->code);
31209 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31210 to return a pointer to VERSION_DECL if the outcome of the expression
31211 formed by PREDICATE_CHAIN is true. This function will be called during
31212 version dispatch to decide which function version to execute. It returns
31213 the basic block at the end, to which more conditions can be added. */
31215 static basic_block
31216 add_condition_to_bb (tree function_decl, tree version_decl,
31217 tree predicate_chain, basic_block new_bb)
31219 gimple return_stmt;
31220 tree convert_expr, result_var;
31221 gimple convert_stmt;
31222 gimple call_cond_stmt;
31223 gimple if_else_stmt;
31225 basic_block bb1, bb2, bb3;
31226 edge e12, e23;
31228 tree cond_var, and_expr_var = NULL_TREE;
31229 gimple_seq gseq;
31231 tree predicate_decl, predicate_arg;
31233 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31235 gcc_assert (new_bb != NULL);
31236 gseq = bb_seq (new_bb);
31239 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31240 build_fold_addr_expr (version_decl));
31241 result_var = create_tmp_var (ptr_type_node, NULL);
31242 convert_stmt = gimple_build_assign (result_var, convert_expr);
31243 return_stmt = gimple_build_return (result_var);
31245 if (predicate_chain == NULL_TREE)
31247 gimple_seq_add_stmt (&gseq, convert_stmt);
31248 gimple_seq_add_stmt (&gseq, return_stmt);
31249 set_bb_seq (new_bb, gseq);
31250 gimple_set_bb (convert_stmt, new_bb);
31251 gimple_set_bb (return_stmt, new_bb);
31252 pop_cfun ();
31253 return new_bb;
31256 while (predicate_chain != NULL)
31258 cond_var = create_tmp_var (integer_type_node, NULL);
31259 predicate_decl = TREE_PURPOSE (predicate_chain);
31260 predicate_arg = TREE_VALUE (predicate_chain);
31261 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31262 gimple_call_set_lhs (call_cond_stmt, cond_var);
31264 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31265 gimple_set_bb (call_cond_stmt, new_bb);
31266 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31268 predicate_chain = TREE_CHAIN (predicate_chain);
31270 if (and_expr_var == NULL)
31271 and_expr_var = cond_var;
31272 else
31274 gimple assign_stmt;
31275 /* Use MIN_EXPR to check if any integer is zero?.
31276 and_expr_var = min_expr <cond_var, and_expr_var> */
31277 assign_stmt = gimple_build_assign (and_expr_var,
31278 build2 (MIN_EXPR, integer_type_node,
31279 cond_var, and_expr_var));
31281 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31282 gimple_set_bb (assign_stmt, new_bb);
31283 gimple_seq_add_stmt (&gseq, assign_stmt);
31287 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31288 integer_zero_node,
31289 NULL_TREE, NULL_TREE);
31290 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31291 gimple_set_bb (if_else_stmt, new_bb);
31292 gimple_seq_add_stmt (&gseq, if_else_stmt);
31294 gimple_seq_add_stmt (&gseq, convert_stmt);
31295 gimple_seq_add_stmt (&gseq, return_stmt);
31296 set_bb_seq (new_bb, gseq);
31298 bb1 = new_bb;
31299 e12 = split_block (bb1, if_else_stmt);
31300 bb2 = e12->dest;
31301 e12->flags &= ~EDGE_FALLTHRU;
31302 e12->flags |= EDGE_TRUE_VALUE;
31304 e23 = split_block (bb2, return_stmt);
31306 gimple_set_bb (convert_stmt, bb2);
31307 gimple_set_bb (return_stmt, bb2);
31309 bb3 = e23->dest;
31310 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31312 remove_edge (e23);
31313 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31315 pop_cfun ();
31317 return bb3;
31320 /* This parses the attribute arguments to target in DECL and determines
31321 the right builtin to use to match the platform specification.
31322 It returns the priority value for this version decl. If PREDICATE_LIST
31323 is not NULL, it stores the list of cpu features that need to be checked
31324 before dispatching this function. */
31326 static unsigned int
31327 get_builtin_code_for_version (tree decl, tree *predicate_list)
31329 tree attrs;
31330 struct cl_target_option cur_target;
31331 tree target_node;
31332 struct cl_target_option *new_target;
31333 const char *arg_str = NULL;
31334 const char *attrs_str = NULL;
31335 char *tok_str = NULL;
31336 char *token;
31338 /* Priority of i386 features, greater value is higher priority. This is
31339 used to decide the order in which function dispatch must happen. For
31340 instance, a version specialized for SSE4.2 should be checked for dispatch
31341 before a version for SSE3, as SSE4.2 implies SSE3. */
31342 enum feature_priority
31344 P_ZERO = 0,
31345 P_MMX,
31346 P_SSE,
31347 P_SSE2,
31348 P_SSE3,
31349 P_SSSE3,
31350 P_PROC_SSSE3,
31351 P_SSE4_A,
31352 P_PROC_SSE4_A,
31353 P_SSE4_1,
31354 P_SSE4_2,
31355 P_PROC_SSE4_2,
31356 P_POPCNT,
31357 P_AVX,
31358 P_PROC_AVX,
31359 P_FMA4,
31360 P_XOP,
31361 P_PROC_XOP,
31362 P_FMA,
31363 P_PROC_FMA,
31364 P_AVX2,
31365 P_PROC_AVX2
31368 enum feature_priority priority = P_ZERO;
31370 /* These are the target attribute strings for which a dispatcher is
31371 available, from fold_builtin_cpu. */
31373 static struct _feature_list
31375 const char *const name;
31376 const enum feature_priority priority;
31378 const feature_list[] =
31380 {"mmx", P_MMX},
31381 {"sse", P_SSE},
31382 {"sse2", P_SSE2},
31383 {"sse3", P_SSE3},
31384 {"sse4a", P_SSE4_A},
31385 {"ssse3", P_SSSE3},
31386 {"sse4.1", P_SSE4_1},
31387 {"sse4.2", P_SSE4_2},
31388 {"popcnt", P_POPCNT},
31389 {"avx", P_AVX},
31390 {"fma4", P_FMA4},
31391 {"xop", P_XOP},
31392 {"fma", P_FMA},
31393 {"avx2", P_AVX2}
31397 static unsigned int NUM_FEATURES
31398 = sizeof (feature_list) / sizeof (struct _feature_list);
31400 unsigned int i;
31402 tree predicate_chain = NULL_TREE;
31403 tree predicate_decl, predicate_arg;
31405 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31406 gcc_assert (attrs != NULL);
31408 attrs = TREE_VALUE (TREE_VALUE (attrs));
31410 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31411 attrs_str = TREE_STRING_POINTER (attrs);
31413 /* Return priority zero for default function. */
31414 if (strcmp (attrs_str, "default") == 0)
31415 return 0;
31417 /* Handle arch= if specified. For priority, set it to be 1 more than
31418 the best instruction set the processor can handle. For instance, if
31419 there is a version for atom and a version for ssse3 (the highest ISA
31420 priority for atom), the atom version must be checked for dispatch
31421 before the ssse3 version. */
31422 if (strstr (attrs_str, "arch=") != NULL)
31424 cl_target_option_save (&cur_target, &global_options);
31425 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31426 &global_options_set);
31428 gcc_assert (target_node);
31429 new_target = TREE_TARGET_OPTION (target_node);
31430 gcc_assert (new_target);
31432 if (new_target->arch_specified && new_target->arch > 0)
31434 switch (new_target->arch)
31436 case PROCESSOR_CORE2:
31437 arg_str = "core2";
31438 priority = P_PROC_SSSE3;
31439 break;
31440 case PROCESSOR_NEHALEM:
31441 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31442 arg_str = "westmere";
31443 else
31444 /* We translate "arch=corei7" and "arch=nehalem" to
31445 "corei7" so that it will be mapped to M_INTEL_COREI7
31446 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31447 arg_str = "corei7";
31448 priority = P_PROC_SSE4_2;
31449 break;
31450 case PROCESSOR_SANDYBRIDGE:
31451 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31452 arg_str = "ivybridge";
31453 else
31454 arg_str = "sandybridge";
31455 priority = P_PROC_AVX;
31456 break;
31457 case PROCESSOR_HASWELL:
31458 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31459 arg_str = "broadwell";
31460 else
31461 arg_str = "haswell";
31462 priority = P_PROC_AVX2;
31463 break;
31464 case PROCESSOR_BONNELL:
31465 arg_str = "bonnell";
31466 priority = P_PROC_SSSE3;
31467 break;
31468 case PROCESSOR_SILVERMONT:
31469 arg_str = "silvermont";
31470 priority = P_PROC_SSE4_2;
31471 break;
31472 case PROCESSOR_AMDFAM10:
31473 arg_str = "amdfam10h";
31474 priority = P_PROC_SSE4_A;
31475 break;
31476 case PROCESSOR_BTVER1:
31477 arg_str = "btver1";
31478 priority = P_PROC_SSE4_A;
31479 break;
31480 case PROCESSOR_BTVER2:
31481 arg_str = "btver2";
31482 priority = P_PROC_AVX;
31483 break;
31484 case PROCESSOR_BDVER1:
31485 arg_str = "bdver1";
31486 priority = P_PROC_XOP;
31487 break;
31488 case PROCESSOR_BDVER2:
31489 arg_str = "bdver2";
31490 priority = P_PROC_FMA;
31491 break;
31492 case PROCESSOR_BDVER3:
31493 arg_str = "bdver3";
31494 priority = P_PROC_FMA;
31495 break;
31496 case PROCESSOR_BDVER4:
31497 arg_str = "bdver4";
31498 priority = P_PROC_AVX2;
31499 break;
31503 cl_target_option_restore (&global_options, &cur_target);
31505 if (predicate_list && arg_str == NULL)
31507 error_at (DECL_SOURCE_LOCATION (decl),
31508 "No dispatcher found for the versioning attributes");
31509 return 0;
31512 if (predicate_list)
31514 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31515 /* For a C string literal the length includes the trailing NULL. */
31516 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31517 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31518 predicate_chain);
31522 /* Process feature name. */
31523 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31524 strcpy (tok_str, attrs_str);
31525 token = strtok (tok_str, ",");
31526 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31528 while (token != NULL)
31530 /* Do not process "arch=" */
31531 if (strncmp (token, "arch=", 5) == 0)
31533 token = strtok (NULL, ",");
31534 continue;
31536 for (i = 0; i < NUM_FEATURES; ++i)
31538 if (strcmp (token, feature_list[i].name) == 0)
31540 if (predicate_list)
31542 predicate_arg = build_string_literal (
31543 strlen (feature_list[i].name) + 1,
31544 feature_list[i].name);
31545 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31546 predicate_chain);
31548 /* Find the maximum priority feature. */
31549 if (feature_list[i].priority > priority)
31550 priority = feature_list[i].priority;
31552 break;
31555 if (predicate_list && i == NUM_FEATURES)
31557 error_at (DECL_SOURCE_LOCATION (decl),
31558 "No dispatcher found for %s", token);
31559 return 0;
31561 token = strtok (NULL, ",");
31563 free (tok_str);
31565 if (predicate_list && predicate_chain == NULL_TREE)
31567 error_at (DECL_SOURCE_LOCATION (decl),
31568 "No dispatcher found for the versioning attributes : %s",
31569 attrs_str);
31570 return 0;
31572 else if (predicate_list)
31574 predicate_chain = nreverse (predicate_chain);
31575 *predicate_list = predicate_chain;
31578 return priority;
31581 /* This compares the priority of target features in function DECL1
31582 and DECL2. It returns positive value if DECL1 is higher priority,
31583 negative value if DECL2 is higher priority and 0 if they are the
31584 same. */
31586 static int
31587 ix86_compare_version_priority (tree decl1, tree decl2)
31589 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31590 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31592 return (int)priority1 - (int)priority2;
31595 /* V1 and V2 point to function versions with different priorities
31596 based on the target ISA. This function compares their priorities. */
31598 static int
31599 feature_compare (const void *v1, const void *v2)
31601 typedef struct _function_version_info
31603 tree version_decl;
31604 tree predicate_chain;
31605 unsigned int dispatch_priority;
31606 } function_version_info;
31608 const function_version_info c1 = *(const function_version_info *)v1;
31609 const function_version_info c2 = *(const function_version_info *)v2;
31610 return (c2.dispatch_priority - c1.dispatch_priority);
31613 /* This function generates the dispatch function for
31614 multi-versioned functions. DISPATCH_DECL is the function which will
31615 contain the dispatch logic. FNDECLS are the function choices for
31616 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31617 in DISPATCH_DECL in which the dispatch code is generated. */
31619 static int
31620 dispatch_function_versions (tree dispatch_decl,
31621 void *fndecls_p,
31622 basic_block *empty_bb)
31624 tree default_decl;
31625 gimple ifunc_cpu_init_stmt;
31626 gimple_seq gseq;
31627 int ix;
31628 tree ele;
31629 vec<tree> *fndecls;
31630 unsigned int num_versions = 0;
31631 unsigned int actual_versions = 0;
31632 unsigned int i;
31634 struct _function_version_info
31636 tree version_decl;
31637 tree predicate_chain;
31638 unsigned int dispatch_priority;
31639 }*function_version_info;
31641 gcc_assert (dispatch_decl != NULL
31642 && fndecls_p != NULL
31643 && empty_bb != NULL);
31645 /*fndecls_p is actually a vector. */
31646 fndecls = static_cast<vec<tree> *> (fndecls_p);
31648 /* At least one more version other than the default. */
31649 num_versions = fndecls->length ();
31650 gcc_assert (num_versions >= 2);
31652 function_version_info = (struct _function_version_info *)
31653 XNEWVEC (struct _function_version_info, (num_versions - 1));
31655 /* The first version in the vector is the default decl. */
31656 default_decl = (*fndecls)[0];
31658 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31660 gseq = bb_seq (*empty_bb);
31661 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31662 constructors, so explicity call __builtin_cpu_init here. */
31663 ifunc_cpu_init_stmt = gimple_build_call_vec (
31664 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31665 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31666 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31667 set_bb_seq (*empty_bb, gseq);
31669 pop_cfun ();
31672 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31674 tree version_decl = ele;
31675 tree predicate_chain = NULL_TREE;
31676 unsigned int priority;
31677 /* Get attribute string, parse it and find the right predicate decl.
31678 The predicate function could be a lengthy combination of many
31679 features, like arch-type and various isa-variants. */
31680 priority = get_builtin_code_for_version (version_decl,
31681 &predicate_chain);
31683 if (predicate_chain == NULL_TREE)
31684 continue;
31686 function_version_info [actual_versions].version_decl = version_decl;
31687 function_version_info [actual_versions].predicate_chain
31688 = predicate_chain;
31689 function_version_info [actual_versions].dispatch_priority = priority;
31690 actual_versions++;
31693 /* Sort the versions according to descending order of dispatch priority. The
31694 priority is based on the ISA. This is not a perfect solution. There
31695 could still be ambiguity. If more than one function version is suitable
31696 to execute, which one should be dispatched? In future, allow the user
31697 to specify a dispatch priority next to the version. */
31698 qsort (function_version_info, actual_versions,
31699 sizeof (struct _function_version_info), feature_compare);
31701 for (i = 0; i < actual_versions; ++i)
31702 *empty_bb = add_condition_to_bb (dispatch_decl,
31703 function_version_info[i].version_decl,
31704 function_version_info[i].predicate_chain,
31705 *empty_bb);
31707 /* dispatch default version at the end. */
31708 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31709 NULL, *empty_bb);
31711 free (function_version_info);
31712 return 0;
31715 /* Comparator function to be used in qsort routine to sort attribute
31716 specification strings to "target". */
31718 static int
31719 attr_strcmp (const void *v1, const void *v2)
31721 const char *c1 = *(char *const*)v1;
31722 const char *c2 = *(char *const*)v2;
31723 return strcmp (c1, c2);
31726 /* ARGLIST is the argument to target attribute. This function tokenizes
31727 the comma separated arguments, sorts them and returns a string which
31728 is a unique identifier for the comma separated arguments. It also
31729 replaces non-identifier characters "=,-" with "_". */
31731 static char *
31732 sorted_attr_string (tree arglist)
31734 tree arg;
31735 size_t str_len_sum = 0;
31736 char **args = NULL;
31737 char *attr_str, *ret_str;
31738 char *attr = NULL;
31739 unsigned int argnum = 1;
31740 unsigned int i;
31742 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31744 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31745 size_t len = strlen (str);
31746 str_len_sum += len + 1;
31747 if (arg != arglist)
31748 argnum++;
31749 for (i = 0; i < strlen (str); i++)
31750 if (str[i] == ',')
31751 argnum++;
31754 attr_str = XNEWVEC (char, str_len_sum);
31755 str_len_sum = 0;
31756 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31758 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31759 size_t len = strlen (str);
31760 memcpy (attr_str + str_len_sum, str, len);
31761 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31762 str_len_sum += len + 1;
31765 /* Replace "=,-" with "_". */
31766 for (i = 0; i < strlen (attr_str); i++)
31767 if (attr_str[i] == '=' || attr_str[i]== '-')
31768 attr_str[i] = '_';
31770 if (argnum == 1)
31771 return attr_str;
31773 args = XNEWVEC (char *, argnum);
31775 i = 0;
31776 attr = strtok (attr_str, ",");
31777 while (attr != NULL)
31779 args[i] = attr;
31780 i++;
31781 attr = strtok (NULL, ",");
31784 qsort (args, argnum, sizeof (char *), attr_strcmp);
31786 ret_str = XNEWVEC (char, str_len_sum);
31787 str_len_sum = 0;
31788 for (i = 0; i < argnum; i++)
31790 size_t len = strlen (args[i]);
31791 memcpy (ret_str + str_len_sum, args[i], len);
31792 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31793 str_len_sum += len + 1;
31796 XDELETEVEC (args);
31797 XDELETEVEC (attr_str);
31798 return ret_str;
31801 /* This function changes the assembler name for functions that are
31802 versions. If DECL is a function version and has a "target"
31803 attribute, it appends the attribute string to its assembler name. */
31805 static tree
31806 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31808 tree version_attr;
31809 const char *orig_name, *version_string;
31810 char *attr_str, *assembler_name;
31812 if (DECL_DECLARED_INLINE_P (decl)
31813 && lookup_attribute ("gnu_inline",
31814 DECL_ATTRIBUTES (decl)))
31815 error_at (DECL_SOURCE_LOCATION (decl),
31816 "Function versions cannot be marked as gnu_inline,"
31817 " bodies have to be generated");
31819 if (DECL_VIRTUAL_P (decl)
31820 || DECL_VINDEX (decl))
31821 sorry ("Virtual function multiversioning not supported");
31823 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31825 /* target attribute string cannot be NULL. */
31826 gcc_assert (version_attr != NULL_TREE);
31828 orig_name = IDENTIFIER_POINTER (id);
31829 version_string
31830 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31832 if (strcmp (version_string, "default") == 0)
31833 return id;
31835 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31836 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31838 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31840 /* Allow assembler name to be modified if already set. */
31841 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31842 SET_DECL_RTL (decl, NULL);
31844 tree ret = get_identifier (assembler_name);
31845 XDELETEVEC (attr_str);
31846 XDELETEVEC (assembler_name);
31847 return ret;
31850 /* This function returns true if FN1 and FN2 are versions of the same function,
31851 that is, the target strings of the function decls are different. This assumes
31852 that FN1 and FN2 have the same signature. */
31854 static bool
31855 ix86_function_versions (tree fn1, tree fn2)
31857 tree attr1, attr2;
31858 char *target1, *target2;
31859 bool result;
31861 if (TREE_CODE (fn1) != FUNCTION_DECL
31862 || TREE_CODE (fn2) != FUNCTION_DECL)
31863 return false;
31865 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31866 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31868 /* At least one function decl should have the target attribute specified. */
31869 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31870 return false;
31872 /* Diagnose missing target attribute if one of the decls is already
31873 multi-versioned. */
31874 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31876 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31878 if (attr2 != NULL_TREE)
31880 tree tem = fn1;
31881 fn1 = fn2;
31882 fn2 = tem;
31883 attr1 = attr2;
31885 error_at (DECL_SOURCE_LOCATION (fn2),
31886 "missing %<target%> attribute for multi-versioned %D",
31887 fn2);
31888 inform (DECL_SOURCE_LOCATION (fn1),
31889 "previous declaration of %D", fn1);
31890 /* Prevent diagnosing of the same error multiple times. */
31891 DECL_ATTRIBUTES (fn2)
31892 = tree_cons (get_identifier ("target"),
31893 copy_node (TREE_VALUE (attr1)),
31894 DECL_ATTRIBUTES (fn2));
31896 return false;
31899 target1 = sorted_attr_string (TREE_VALUE (attr1));
31900 target2 = sorted_attr_string (TREE_VALUE (attr2));
31902 /* The sorted target strings must be different for fn1 and fn2
31903 to be versions. */
31904 if (strcmp (target1, target2) == 0)
31905 result = false;
31906 else
31907 result = true;
31909 XDELETEVEC (target1);
31910 XDELETEVEC (target2);
31912 return result;
31915 static tree
31916 ix86_mangle_decl_assembler_name (tree decl, tree id)
31918 /* For function version, add the target suffix to the assembler name. */
31919 if (TREE_CODE (decl) == FUNCTION_DECL
31920 && DECL_FUNCTION_VERSIONED (decl))
31921 id = ix86_mangle_function_version_assembler_name (decl, id);
31922 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31923 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31924 #endif
31926 return id;
31929 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31930 is true, append the full path name of the source file. */
31932 static char *
31933 make_name (tree decl, const char *suffix, bool make_unique)
31935 char *global_var_name;
31936 int name_len;
31937 const char *name;
31938 const char *unique_name = NULL;
31940 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31942 /* Get a unique name that can be used globally without any chances
31943 of collision at link time. */
31944 if (make_unique)
31945 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31947 name_len = strlen (name) + strlen (suffix) + 2;
31949 if (make_unique)
31950 name_len += strlen (unique_name) + 1;
31951 global_var_name = XNEWVEC (char, name_len);
31953 /* Use '.' to concatenate names as it is demangler friendly. */
31954 if (make_unique)
31955 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31956 suffix);
31957 else
31958 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31960 return global_var_name;
31963 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31965 /* Make a dispatcher declaration for the multi-versioned function DECL.
31966 Calls to DECL function will be replaced with calls to the dispatcher
31967 by the front-end. Return the decl created. */
31969 static tree
31970 make_dispatcher_decl (const tree decl)
31972 tree func_decl;
31973 char *func_name;
31974 tree fn_type, func_type;
31975 bool is_uniq = false;
31977 if (TREE_PUBLIC (decl) == 0)
31978 is_uniq = true;
31980 func_name = make_name (decl, "ifunc", is_uniq);
31982 fn_type = TREE_TYPE (decl);
31983 func_type = build_function_type (TREE_TYPE (fn_type),
31984 TYPE_ARG_TYPES (fn_type));
31986 func_decl = build_fn_decl (func_name, func_type);
31987 XDELETEVEC (func_name);
31988 TREE_USED (func_decl) = 1;
31989 DECL_CONTEXT (func_decl) = NULL_TREE;
31990 DECL_INITIAL (func_decl) = error_mark_node;
31991 DECL_ARTIFICIAL (func_decl) = 1;
31992 /* Mark this func as external, the resolver will flip it again if
31993 it gets generated. */
31994 DECL_EXTERNAL (func_decl) = 1;
31995 /* This will be of type IFUNCs have to be externally visible. */
31996 TREE_PUBLIC (func_decl) = 1;
31998 return func_decl;
32001 #endif
32003 /* Returns true if decl is multi-versioned and DECL is the default function,
32004 that is it is not tagged with target specific optimization. */
32006 static bool
32007 is_function_default_version (const tree decl)
32009 if (TREE_CODE (decl) != FUNCTION_DECL
32010 || !DECL_FUNCTION_VERSIONED (decl))
32011 return false;
32012 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32013 gcc_assert (attr);
32014 attr = TREE_VALUE (TREE_VALUE (attr));
32015 return (TREE_CODE (attr) == STRING_CST
32016 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32019 /* Make a dispatcher declaration for the multi-versioned function DECL.
32020 Calls to DECL function will be replaced with calls to the dispatcher
32021 by the front-end. Returns the decl of the dispatcher function. */
32023 static tree
32024 ix86_get_function_versions_dispatcher (void *decl)
32026 tree fn = (tree) decl;
32027 struct cgraph_node *node = NULL;
32028 struct cgraph_node *default_node = NULL;
32029 struct cgraph_function_version_info *node_v = NULL;
32030 struct cgraph_function_version_info *first_v = NULL;
32032 tree dispatch_decl = NULL;
32034 struct cgraph_function_version_info *default_version_info = NULL;
32036 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32038 node = cgraph_get_node (fn);
32039 gcc_assert (node != NULL);
32041 node_v = get_cgraph_node_version (node);
32042 gcc_assert (node_v != NULL);
32044 if (node_v->dispatcher_resolver != NULL)
32045 return node_v->dispatcher_resolver;
32047 /* Find the default version and make it the first node. */
32048 first_v = node_v;
32049 /* Go to the beginning of the chain. */
32050 while (first_v->prev != NULL)
32051 first_v = first_v->prev;
32052 default_version_info = first_v;
32053 while (default_version_info != NULL)
32055 if (is_function_default_version
32056 (default_version_info->this_node->decl))
32057 break;
32058 default_version_info = default_version_info->next;
32061 /* If there is no default node, just return NULL. */
32062 if (default_version_info == NULL)
32063 return NULL;
32065 /* Make default info the first node. */
32066 if (first_v != default_version_info)
32068 default_version_info->prev->next = default_version_info->next;
32069 if (default_version_info->next)
32070 default_version_info->next->prev = default_version_info->prev;
32071 first_v->prev = default_version_info;
32072 default_version_info->next = first_v;
32073 default_version_info->prev = NULL;
32076 default_node = default_version_info->this_node;
32078 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32079 if (targetm.has_ifunc_p ())
32081 struct cgraph_function_version_info *it_v = NULL;
32082 struct cgraph_node *dispatcher_node = NULL;
32083 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32085 /* Right now, the dispatching is done via ifunc. */
32086 dispatch_decl = make_dispatcher_decl (default_node->decl);
32088 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32089 gcc_assert (dispatcher_node != NULL);
32090 dispatcher_node->dispatcher_function = 1;
32091 dispatcher_version_info
32092 = insert_new_cgraph_node_version (dispatcher_node);
32093 dispatcher_version_info->next = default_version_info;
32094 dispatcher_node->definition = 1;
32096 /* Set the dispatcher for all the versions. */
32097 it_v = default_version_info;
32098 while (it_v != NULL)
32100 it_v->dispatcher_resolver = dispatch_decl;
32101 it_v = it_v->next;
32104 else
32105 #endif
32107 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32108 "multiversioning needs ifunc which is not supported "
32109 "on this target");
32112 return dispatch_decl;
32115 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32116 it to CHAIN. */
32118 static tree
32119 make_attribute (const char *name, const char *arg_name, tree chain)
32121 tree attr_name;
32122 tree attr_arg_name;
32123 tree attr_args;
32124 tree attr;
32126 attr_name = get_identifier (name);
32127 attr_arg_name = build_string (strlen (arg_name), arg_name);
32128 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32129 attr = tree_cons (attr_name, attr_args, chain);
32130 return attr;
32133 /* Make the resolver function decl to dispatch the versions of
32134 a multi-versioned function, DEFAULT_DECL. Create an
32135 empty basic block in the resolver and store the pointer in
32136 EMPTY_BB. Return the decl of the resolver function. */
32138 static tree
32139 make_resolver_func (const tree default_decl,
32140 const tree dispatch_decl,
32141 basic_block *empty_bb)
32143 char *resolver_name;
32144 tree decl, type, decl_name, t;
32145 bool is_uniq = false;
32147 /* IFUNC's have to be globally visible. So, if the default_decl is
32148 not, then the name of the IFUNC should be made unique. */
32149 if (TREE_PUBLIC (default_decl) == 0)
32150 is_uniq = true;
32152 /* Append the filename to the resolver function if the versions are
32153 not externally visible. This is because the resolver function has
32154 to be externally visible for the loader to find it. So, appending
32155 the filename will prevent conflicts with a resolver function from
32156 another module which is based on the same version name. */
32157 resolver_name = make_name (default_decl, "resolver", is_uniq);
32159 /* The resolver function should return a (void *). */
32160 type = build_function_type_list (ptr_type_node, NULL_TREE);
32162 decl = build_fn_decl (resolver_name, type);
32163 decl_name = get_identifier (resolver_name);
32164 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32166 DECL_NAME (decl) = decl_name;
32167 TREE_USED (decl) = 1;
32168 DECL_ARTIFICIAL (decl) = 1;
32169 DECL_IGNORED_P (decl) = 0;
32170 /* IFUNC resolvers have to be externally visible. */
32171 TREE_PUBLIC (decl) = 1;
32172 DECL_UNINLINABLE (decl) = 1;
32174 /* Resolver is not external, body is generated. */
32175 DECL_EXTERNAL (decl) = 0;
32176 DECL_EXTERNAL (dispatch_decl) = 0;
32178 DECL_CONTEXT (decl) = NULL_TREE;
32179 DECL_INITIAL (decl) = make_node (BLOCK);
32180 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32182 if (DECL_COMDAT_GROUP (default_decl)
32183 || TREE_PUBLIC (default_decl))
32185 /* In this case, each translation unit with a call to this
32186 versioned function will put out a resolver. Ensure it
32187 is comdat to keep just one copy. */
32188 DECL_COMDAT (decl) = 1;
32189 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32191 /* Build result decl and add to function_decl. */
32192 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32193 DECL_ARTIFICIAL (t) = 1;
32194 DECL_IGNORED_P (t) = 1;
32195 DECL_RESULT (decl) = t;
32197 gimplify_function_tree (decl);
32198 push_cfun (DECL_STRUCT_FUNCTION (decl));
32199 *empty_bb = init_lowered_empty_function (decl, false);
32201 cgraph_add_new_function (decl, true);
32202 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32204 pop_cfun ();
32206 gcc_assert (dispatch_decl != NULL);
32207 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32208 DECL_ATTRIBUTES (dispatch_decl)
32209 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32211 /* Create the alias for dispatch to resolver here. */
32212 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32213 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32214 XDELETEVEC (resolver_name);
32215 return decl;
32218 /* Generate the dispatching code body to dispatch multi-versioned function
32219 DECL. The target hook is called to process the "target" attributes and
32220 provide the code to dispatch the right function at run-time. NODE points
32221 to the dispatcher decl whose body will be created. */
32223 static tree
32224 ix86_generate_version_dispatcher_body (void *node_p)
32226 tree resolver_decl;
32227 basic_block empty_bb;
32228 tree default_ver_decl;
32229 struct cgraph_node *versn;
32230 struct cgraph_node *node;
32232 struct cgraph_function_version_info *node_version_info = NULL;
32233 struct cgraph_function_version_info *versn_info = NULL;
32235 node = (cgraph_node *)node_p;
32237 node_version_info = get_cgraph_node_version (node);
32238 gcc_assert (node->dispatcher_function
32239 && node_version_info != NULL);
32241 if (node_version_info->dispatcher_resolver)
32242 return node_version_info->dispatcher_resolver;
32244 /* The first version in the chain corresponds to the default version. */
32245 default_ver_decl = node_version_info->next->this_node->decl;
32247 /* node is going to be an alias, so remove the finalized bit. */
32248 node->definition = false;
32250 resolver_decl = make_resolver_func (default_ver_decl,
32251 node->decl, &empty_bb);
32253 node_version_info->dispatcher_resolver = resolver_decl;
32255 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32257 auto_vec<tree, 2> fn_ver_vec;
32259 for (versn_info = node_version_info->next; versn_info;
32260 versn_info = versn_info->next)
32262 versn = versn_info->this_node;
32263 /* Check for virtual functions here again, as by this time it should
32264 have been determined if this function needs a vtable index or
32265 not. This happens for methods in derived classes that override
32266 virtual methods in base classes but are not explicitly marked as
32267 virtual. */
32268 if (DECL_VINDEX (versn->decl))
32269 sorry ("Virtual function multiversioning not supported");
32271 fn_ver_vec.safe_push (versn->decl);
32274 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32275 rebuild_cgraph_edges ();
32276 pop_cfun ();
32277 return resolver_decl;
32279 /* This builds the processor_model struct type defined in
32280 libgcc/config/i386/cpuinfo.c */
32282 static tree
32283 build_processor_model_struct (void)
32285 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32286 "__cpu_features"};
32287 tree field = NULL_TREE, field_chain = NULL_TREE;
32288 int i;
32289 tree type = make_node (RECORD_TYPE);
32291 /* The first 3 fields are unsigned int. */
32292 for (i = 0; i < 3; ++i)
32294 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32295 get_identifier (field_name[i]), unsigned_type_node);
32296 if (field_chain != NULL_TREE)
32297 DECL_CHAIN (field) = field_chain;
32298 field_chain = field;
32301 /* The last field is an array of unsigned integers of size one. */
32302 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32303 get_identifier (field_name[3]),
32304 build_array_type (unsigned_type_node,
32305 build_index_type (size_one_node)));
32306 if (field_chain != NULL_TREE)
32307 DECL_CHAIN (field) = field_chain;
32308 field_chain = field;
32310 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32311 return type;
32314 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32316 static tree
32317 make_var_decl (tree type, const char *name)
32319 tree new_decl;
32321 new_decl = build_decl (UNKNOWN_LOCATION,
32322 VAR_DECL,
32323 get_identifier(name),
32324 type);
32326 DECL_EXTERNAL (new_decl) = 1;
32327 TREE_STATIC (new_decl) = 1;
32328 TREE_PUBLIC (new_decl) = 1;
32329 DECL_INITIAL (new_decl) = 0;
32330 DECL_ARTIFICIAL (new_decl) = 0;
32331 DECL_PRESERVE_P (new_decl) = 1;
32333 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32334 assemble_variable (new_decl, 0, 0, 0);
32336 return new_decl;
32339 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32340 into an integer defined in libgcc/config/i386/cpuinfo.c */
32342 static tree
32343 fold_builtin_cpu (tree fndecl, tree *args)
32345 unsigned int i;
32346 enum ix86_builtins fn_code = (enum ix86_builtins)
32347 DECL_FUNCTION_CODE (fndecl);
32348 tree param_string_cst = NULL;
32350 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32351 enum processor_features
32353 F_CMOV = 0,
32354 F_MMX,
32355 F_POPCNT,
32356 F_SSE,
32357 F_SSE2,
32358 F_SSE3,
32359 F_SSSE3,
32360 F_SSE4_1,
32361 F_SSE4_2,
32362 F_AVX,
32363 F_AVX2,
32364 F_SSE4_A,
32365 F_FMA4,
32366 F_XOP,
32367 F_FMA,
32368 F_MAX
32371 /* These are the values for vendor types and cpu types and subtypes
32372 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32373 the corresponding start value. */
32374 enum processor_model
32376 M_INTEL = 1,
32377 M_AMD,
32378 M_CPU_TYPE_START,
32379 M_INTEL_BONNELL,
32380 M_INTEL_CORE2,
32381 M_INTEL_COREI7,
32382 M_AMDFAM10H,
32383 M_AMDFAM15H,
32384 M_INTEL_SILVERMONT,
32385 M_AMD_BTVER1,
32386 M_AMD_BTVER2,
32387 M_CPU_SUBTYPE_START,
32388 M_INTEL_COREI7_NEHALEM,
32389 M_INTEL_COREI7_WESTMERE,
32390 M_INTEL_COREI7_SANDYBRIDGE,
32391 M_AMDFAM10H_BARCELONA,
32392 M_AMDFAM10H_SHANGHAI,
32393 M_AMDFAM10H_ISTANBUL,
32394 M_AMDFAM15H_BDVER1,
32395 M_AMDFAM15H_BDVER2,
32396 M_AMDFAM15H_BDVER3,
32397 M_AMDFAM15H_BDVER4,
32398 M_INTEL_COREI7_IVYBRIDGE,
32399 M_INTEL_COREI7_HASWELL
32402 static struct _arch_names_table
32404 const char *const name;
32405 const enum processor_model model;
32407 const arch_names_table[] =
32409 {"amd", M_AMD},
32410 {"intel", M_INTEL},
32411 {"atom", M_INTEL_BONNELL},
32412 {"slm", M_INTEL_SILVERMONT},
32413 {"core2", M_INTEL_CORE2},
32414 {"corei7", M_INTEL_COREI7},
32415 {"nehalem", M_INTEL_COREI7_NEHALEM},
32416 {"westmere", M_INTEL_COREI7_WESTMERE},
32417 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32418 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32419 {"haswell", M_INTEL_COREI7_HASWELL},
32420 {"bonnell", M_INTEL_BONNELL},
32421 {"silvermont", M_INTEL_SILVERMONT},
32422 {"amdfam10h", M_AMDFAM10H},
32423 {"barcelona", M_AMDFAM10H_BARCELONA},
32424 {"shanghai", M_AMDFAM10H_SHANGHAI},
32425 {"istanbul", M_AMDFAM10H_ISTANBUL},
32426 {"btver1", M_AMD_BTVER1},
32427 {"amdfam15h", M_AMDFAM15H},
32428 {"bdver1", M_AMDFAM15H_BDVER1},
32429 {"bdver2", M_AMDFAM15H_BDVER2},
32430 {"bdver3", M_AMDFAM15H_BDVER3},
32431 {"bdver4", M_AMDFAM15H_BDVER4},
32432 {"btver2", M_AMD_BTVER2},
32435 static struct _isa_names_table
32437 const char *const name;
32438 const enum processor_features feature;
32440 const isa_names_table[] =
32442 {"cmov", F_CMOV},
32443 {"mmx", F_MMX},
32444 {"popcnt", F_POPCNT},
32445 {"sse", F_SSE},
32446 {"sse2", F_SSE2},
32447 {"sse3", F_SSE3},
32448 {"ssse3", F_SSSE3},
32449 {"sse4a", F_SSE4_A},
32450 {"sse4.1", F_SSE4_1},
32451 {"sse4.2", F_SSE4_2},
32452 {"avx", F_AVX},
32453 {"fma4", F_FMA4},
32454 {"xop", F_XOP},
32455 {"fma", F_FMA},
32456 {"avx2", F_AVX2}
32459 tree __processor_model_type = build_processor_model_struct ();
32460 tree __cpu_model_var = make_var_decl (__processor_model_type,
32461 "__cpu_model");
32464 varpool_add_new_variable (__cpu_model_var);
32466 gcc_assert ((args != NULL) && (*args != NULL));
32468 param_string_cst = *args;
32469 while (param_string_cst
32470 && TREE_CODE (param_string_cst) != STRING_CST)
32472 /* *args must be a expr that can contain other EXPRS leading to a
32473 STRING_CST. */
32474 if (!EXPR_P (param_string_cst))
32476 error ("Parameter to builtin must be a string constant or literal");
32477 return integer_zero_node;
32479 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32482 gcc_assert (param_string_cst);
32484 if (fn_code == IX86_BUILTIN_CPU_IS)
32486 tree ref;
32487 tree field;
32488 tree final;
32490 unsigned int field_val = 0;
32491 unsigned int NUM_ARCH_NAMES
32492 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32494 for (i = 0; i < NUM_ARCH_NAMES; i++)
32495 if (strcmp (arch_names_table[i].name,
32496 TREE_STRING_POINTER (param_string_cst)) == 0)
32497 break;
32499 if (i == NUM_ARCH_NAMES)
32501 error ("Parameter to builtin not valid: %s",
32502 TREE_STRING_POINTER (param_string_cst));
32503 return integer_zero_node;
32506 field = TYPE_FIELDS (__processor_model_type);
32507 field_val = arch_names_table[i].model;
32509 /* CPU types are stored in the next field. */
32510 if (field_val > M_CPU_TYPE_START
32511 && field_val < M_CPU_SUBTYPE_START)
32513 field = DECL_CHAIN (field);
32514 field_val -= M_CPU_TYPE_START;
32517 /* CPU subtypes are stored in the next field. */
32518 if (field_val > M_CPU_SUBTYPE_START)
32520 field = DECL_CHAIN ( DECL_CHAIN (field));
32521 field_val -= M_CPU_SUBTYPE_START;
32524 /* Get the appropriate field in __cpu_model. */
32525 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32526 field, NULL_TREE);
32528 /* Check the value. */
32529 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32530 build_int_cstu (unsigned_type_node, field_val));
32531 return build1 (CONVERT_EXPR, integer_type_node, final);
32533 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32535 tree ref;
32536 tree array_elt;
32537 tree field;
32538 tree final;
32540 unsigned int field_val = 0;
32541 unsigned int NUM_ISA_NAMES
32542 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32544 for (i = 0; i < NUM_ISA_NAMES; i++)
32545 if (strcmp (isa_names_table[i].name,
32546 TREE_STRING_POINTER (param_string_cst)) == 0)
32547 break;
32549 if (i == NUM_ISA_NAMES)
32551 error ("Parameter to builtin not valid: %s",
32552 TREE_STRING_POINTER (param_string_cst));
32553 return integer_zero_node;
32556 field = TYPE_FIELDS (__processor_model_type);
32557 /* Get the last field, which is __cpu_features. */
32558 while (DECL_CHAIN (field))
32559 field = DECL_CHAIN (field);
32561 /* Get the appropriate field: __cpu_model.__cpu_features */
32562 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32563 field, NULL_TREE);
32565 /* Access the 0th element of __cpu_features array. */
32566 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32567 integer_zero_node, NULL_TREE, NULL_TREE);
32569 field_val = (1 << isa_names_table[i].feature);
32570 /* Return __cpu_model.__cpu_features[0] & field_val */
32571 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32572 build_int_cstu (unsigned_type_node, field_val));
32573 return build1 (CONVERT_EXPR, integer_type_node, final);
32575 gcc_unreachable ();
32578 static tree
32579 ix86_fold_builtin (tree fndecl, int n_args,
32580 tree *args, bool ignore ATTRIBUTE_UNUSED)
32582 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32584 enum ix86_builtins fn_code = (enum ix86_builtins)
32585 DECL_FUNCTION_CODE (fndecl);
32586 if (fn_code == IX86_BUILTIN_CPU_IS
32587 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32589 gcc_assert (n_args == 1);
32590 return fold_builtin_cpu (fndecl, args);
32594 #ifdef SUBTARGET_FOLD_BUILTIN
32595 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32596 #endif
32598 return NULL_TREE;
32601 /* Make builtins to detect cpu type and features supported. NAME is
32602 the builtin name, CODE is the builtin code, and FTYPE is the function
32603 type of the builtin. */
32605 static void
32606 make_cpu_type_builtin (const char* name, int code,
32607 enum ix86_builtin_func_type ftype, bool is_const)
32609 tree decl;
32610 tree type;
32612 type = ix86_get_builtin_func_type (ftype);
32613 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32614 NULL, NULL_TREE);
32615 gcc_assert (decl != NULL_TREE);
32616 ix86_builtins[(int) code] = decl;
32617 TREE_READONLY (decl) = is_const;
32620 /* Make builtins to get CPU type and features supported. The created
32621 builtins are :
32623 __builtin_cpu_init (), to detect cpu type and features,
32624 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32625 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32628 static void
32629 ix86_init_platform_type_builtins (void)
32631 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32632 INT_FTYPE_VOID, false);
32633 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32634 INT_FTYPE_PCCHAR, true);
32635 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32636 INT_FTYPE_PCCHAR, true);
32639 /* Internal method for ix86_init_builtins. */
32641 static void
32642 ix86_init_builtins_va_builtins_abi (void)
32644 tree ms_va_ref, sysv_va_ref;
32645 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32646 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32647 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32648 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32650 if (!TARGET_64BIT)
32651 return;
32652 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32653 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32654 ms_va_ref = build_reference_type (ms_va_list_type_node);
32655 sysv_va_ref =
32656 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32658 fnvoid_va_end_ms =
32659 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32660 fnvoid_va_start_ms =
32661 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32662 fnvoid_va_end_sysv =
32663 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32664 fnvoid_va_start_sysv =
32665 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32666 NULL_TREE);
32667 fnvoid_va_copy_ms =
32668 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32669 NULL_TREE);
32670 fnvoid_va_copy_sysv =
32671 build_function_type_list (void_type_node, sysv_va_ref,
32672 sysv_va_ref, NULL_TREE);
32674 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32675 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32676 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32677 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32678 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32679 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32680 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32681 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32682 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32683 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32684 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32685 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32688 static void
32689 ix86_init_builtin_types (void)
32691 tree float128_type_node, float80_type_node;
32693 /* The __float80 type. */
32694 float80_type_node = long_double_type_node;
32695 if (TYPE_MODE (float80_type_node) != XFmode)
32697 /* The __float80 type. */
32698 float80_type_node = make_node (REAL_TYPE);
32700 TYPE_PRECISION (float80_type_node) = 80;
32701 layout_type (float80_type_node);
32703 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32705 /* The __float128 type. */
32706 float128_type_node = make_node (REAL_TYPE);
32707 TYPE_PRECISION (float128_type_node) = 128;
32708 layout_type (float128_type_node);
32709 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32711 /* This macro is built by i386-builtin-types.awk. */
32712 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32715 static void
32716 ix86_init_builtins (void)
32718 tree t;
32720 ix86_init_builtin_types ();
32722 /* Builtins to get CPU type and features. */
32723 ix86_init_platform_type_builtins ();
32725 /* TFmode support builtins. */
32726 def_builtin_const (0, "__builtin_infq",
32727 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32728 def_builtin_const (0, "__builtin_huge_valq",
32729 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32731 /* We will expand them to normal call if SSE isn't available since
32732 they are used by libgcc. */
32733 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32734 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32735 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32736 TREE_READONLY (t) = 1;
32737 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32739 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32740 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32741 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32742 TREE_READONLY (t) = 1;
32743 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32745 ix86_init_tm_builtins ();
32746 ix86_init_mmx_sse_builtins ();
32748 if (TARGET_LP64)
32749 ix86_init_builtins_va_builtins_abi ();
32751 #ifdef SUBTARGET_INIT_BUILTINS
32752 SUBTARGET_INIT_BUILTINS;
32753 #endif
32756 /* Return the ix86 builtin for CODE. */
32758 static tree
32759 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32761 if (code >= IX86_BUILTIN_MAX)
32762 return error_mark_node;
32764 return ix86_builtins[code];
32767 /* Errors in the source file can cause expand_expr to return const0_rtx
32768 where we expect a vector. To avoid crashing, use one of the vector
32769 clear instructions. */
32770 static rtx
32771 safe_vector_operand (rtx x, enum machine_mode mode)
32773 if (x == const0_rtx)
32774 x = CONST0_RTX (mode);
32775 return x;
32778 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32780 static rtx
32781 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32783 rtx pat;
32784 tree arg0 = CALL_EXPR_ARG (exp, 0);
32785 tree arg1 = CALL_EXPR_ARG (exp, 1);
32786 rtx op0 = expand_normal (arg0);
32787 rtx op1 = expand_normal (arg1);
32788 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32789 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32790 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32792 if (VECTOR_MODE_P (mode0))
32793 op0 = safe_vector_operand (op0, mode0);
32794 if (VECTOR_MODE_P (mode1))
32795 op1 = safe_vector_operand (op1, mode1);
32797 if (optimize || !target
32798 || GET_MODE (target) != tmode
32799 || !insn_data[icode].operand[0].predicate (target, tmode))
32800 target = gen_reg_rtx (tmode);
32802 if (GET_MODE (op1) == SImode && mode1 == TImode)
32804 rtx x = gen_reg_rtx (V4SImode);
32805 emit_insn (gen_sse2_loadd (x, op1));
32806 op1 = gen_lowpart (TImode, x);
32809 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32810 op0 = copy_to_mode_reg (mode0, op0);
32811 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32812 op1 = copy_to_mode_reg (mode1, op1);
32814 pat = GEN_FCN (icode) (target, op0, op1);
32815 if (! pat)
32816 return 0;
32818 emit_insn (pat);
32820 return target;
32823 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32825 static rtx
32826 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32827 enum ix86_builtin_func_type m_type,
32828 enum rtx_code sub_code)
32830 rtx pat;
32831 int i;
32832 int nargs;
32833 bool comparison_p = false;
32834 bool tf_p = false;
32835 bool last_arg_constant = false;
32836 int num_memory = 0;
32837 struct {
32838 rtx op;
32839 enum machine_mode mode;
32840 } args[4];
32842 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32844 switch (m_type)
32846 case MULTI_ARG_4_DF2_DI_I:
32847 case MULTI_ARG_4_DF2_DI_I1:
32848 case MULTI_ARG_4_SF2_SI_I:
32849 case MULTI_ARG_4_SF2_SI_I1:
32850 nargs = 4;
32851 last_arg_constant = true;
32852 break;
32854 case MULTI_ARG_3_SF:
32855 case MULTI_ARG_3_DF:
32856 case MULTI_ARG_3_SF2:
32857 case MULTI_ARG_3_DF2:
32858 case MULTI_ARG_3_DI:
32859 case MULTI_ARG_3_SI:
32860 case MULTI_ARG_3_SI_DI:
32861 case MULTI_ARG_3_HI:
32862 case MULTI_ARG_3_HI_SI:
32863 case MULTI_ARG_3_QI:
32864 case MULTI_ARG_3_DI2:
32865 case MULTI_ARG_3_SI2:
32866 case MULTI_ARG_3_HI2:
32867 case MULTI_ARG_3_QI2:
32868 nargs = 3;
32869 break;
32871 case MULTI_ARG_2_SF:
32872 case MULTI_ARG_2_DF:
32873 case MULTI_ARG_2_DI:
32874 case MULTI_ARG_2_SI:
32875 case MULTI_ARG_2_HI:
32876 case MULTI_ARG_2_QI:
32877 nargs = 2;
32878 break;
32880 case MULTI_ARG_2_DI_IMM:
32881 case MULTI_ARG_2_SI_IMM:
32882 case MULTI_ARG_2_HI_IMM:
32883 case MULTI_ARG_2_QI_IMM:
32884 nargs = 2;
32885 last_arg_constant = true;
32886 break;
32888 case MULTI_ARG_1_SF:
32889 case MULTI_ARG_1_DF:
32890 case MULTI_ARG_1_SF2:
32891 case MULTI_ARG_1_DF2:
32892 case MULTI_ARG_1_DI:
32893 case MULTI_ARG_1_SI:
32894 case MULTI_ARG_1_HI:
32895 case MULTI_ARG_1_QI:
32896 case MULTI_ARG_1_SI_DI:
32897 case MULTI_ARG_1_HI_DI:
32898 case MULTI_ARG_1_HI_SI:
32899 case MULTI_ARG_1_QI_DI:
32900 case MULTI_ARG_1_QI_SI:
32901 case MULTI_ARG_1_QI_HI:
32902 nargs = 1;
32903 break;
32905 case MULTI_ARG_2_DI_CMP:
32906 case MULTI_ARG_2_SI_CMP:
32907 case MULTI_ARG_2_HI_CMP:
32908 case MULTI_ARG_2_QI_CMP:
32909 nargs = 2;
32910 comparison_p = true;
32911 break;
32913 case MULTI_ARG_2_SF_TF:
32914 case MULTI_ARG_2_DF_TF:
32915 case MULTI_ARG_2_DI_TF:
32916 case MULTI_ARG_2_SI_TF:
32917 case MULTI_ARG_2_HI_TF:
32918 case MULTI_ARG_2_QI_TF:
32919 nargs = 2;
32920 tf_p = true;
32921 break;
32923 default:
32924 gcc_unreachable ();
32927 if (optimize || !target
32928 || GET_MODE (target) != tmode
32929 || !insn_data[icode].operand[0].predicate (target, tmode))
32930 target = gen_reg_rtx (tmode);
32932 gcc_assert (nargs <= 4);
32934 for (i = 0; i < nargs; i++)
32936 tree arg = CALL_EXPR_ARG (exp, i);
32937 rtx op = expand_normal (arg);
32938 int adjust = (comparison_p) ? 1 : 0;
32939 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32941 if (last_arg_constant && i == nargs - 1)
32943 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32945 enum insn_code new_icode = icode;
32946 switch (icode)
32948 case CODE_FOR_xop_vpermil2v2df3:
32949 case CODE_FOR_xop_vpermil2v4sf3:
32950 case CODE_FOR_xop_vpermil2v4df3:
32951 case CODE_FOR_xop_vpermil2v8sf3:
32952 error ("the last argument must be a 2-bit immediate");
32953 return gen_reg_rtx (tmode);
32954 case CODE_FOR_xop_rotlv2di3:
32955 new_icode = CODE_FOR_rotlv2di3;
32956 goto xop_rotl;
32957 case CODE_FOR_xop_rotlv4si3:
32958 new_icode = CODE_FOR_rotlv4si3;
32959 goto xop_rotl;
32960 case CODE_FOR_xop_rotlv8hi3:
32961 new_icode = CODE_FOR_rotlv8hi3;
32962 goto xop_rotl;
32963 case CODE_FOR_xop_rotlv16qi3:
32964 new_icode = CODE_FOR_rotlv16qi3;
32965 xop_rotl:
32966 if (CONST_INT_P (op))
32968 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32969 op = GEN_INT (INTVAL (op) & mask);
32970 gcc_checking_assert
32971 (insn_data[icode].operand[i + 1].predicate (op, mode));
32973 else
32975 gcc_checking_assert
32976 (nargs == 2
32977 && insn_data[new_icode].operand[0].mode == tmode
32978 && insn_data[new_icode].operand[1].mode == tmode
32979 && insn_data[new_icode].operand[2].mode == mode
32980 && insn_data[new_icode].operand[0].predicate
32981 == insn_data[icode].operand[0].predicate
32982 && insn_data[new_icode].operand[1].predicate
32983 == insn_data[icode].operand[1].predicate);
32984 icode = new_icode;
32985 goto non_constant;
32987 break;
32988 default:
32989 gcc_unreachable ();
32993 else
32995 non_constant:
32996 if (VECTOR_MODE_P (mode))
32997 op = safe_vector_operand (op, mode);
32999 /* If we aren't optimizing, only allow one memory operand to be
33000 generated. */
33001 if (memory_operand (op, mode))
33002 num_memory++;
33004 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33006 if (optimize
33007 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33008 || num_memory > 1)
33009 op = force_reg (mode, op);
33012 args[i].op = op;
33013 args[i].mode = mode;
33016 switch (nargs)
33018 case 1:
33019 pat = GEN_FCN (icode) (target, args[0].op);
33020 break;
33022 case 2:
33023 if (tf_p)
33024 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33025 GEN_INT ((int)sub_code));
33026 else if (! comparison_p)
33027 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33028 else
33030 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33031 args[0].op,
33032 args[1].op);
33034 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33036 break;
33038 case 3:
33039 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33040 break;
33042 case 4:
33043 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33044 break;
33046 default:
33047 gcc_unreachable ();
33050 if (! pat)
33051 return 0;
33053 emit_insn (pat);
33054 return target;
33057 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33058 insns with vec_merge. */
33060 static rtx
33061 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33062 rtx target)
33064 rtx pat;
33065 tree arg0 = CALL_EXPR_ARG (exp, 0);
33066 rtx op1, op0 = expand_normal (arg0);
33067 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33068 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33070 if (optimize || !target
33071 || GET_MODE (target) != tmode
33072 || !insn_data[icode].operand[0].predicate (target, tmode))
33073 target = gen_reg_rtx (tmode);
33075 if (VECTOR_MODE_P (mode0))
33076 op0 = safe_vector_operand (op0, mode0);
33078 if ((optimize && !register_operand (op0, mode0))
33079 || !insn_data[icode].operand[1].predicate (op0, mode0))
33080 op0 = copy_to_mode_reg (mode0, op0);
33082 op1 = op0;
33083 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33084 op1 = copy_to_mode_reg (mode0, op1);
33086 pat = GEN_FCN (icode) (target, op0, op1);
33087 if (! pat)
33088 return 0;
33089 emit_insn (pat);
33090 return target;
33093 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33095 static rtx
33096 ix86_expand_sse_compare (const struct builtin_description *d,
33097 tree exp, rtx target, bool swap)
33099 rtx pat;
33100 tree arg0 = CALL_EXPR_ARG (exp, 0);
33101 tree arg1 = CALL_EXPR_ARG (exp, 1);
33102 rtx op0 = expand_normal (arg0);
33103 rtx op1 = expand_normal (arg1);
33104 rtx op2;
33105 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33106 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33107 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33108 enum rtx_code comparison = d->comparison;
33110 if (VECTOR_MODE_P (mode0))
33111 op0 = safe_vector_operand (op0, mode0);
33112 if (VECTOR_MODE_P (mode1))
33113 op1 = safe_vector_operand (op1, mode1);
33115 /* Swap operands if we have a comparison that isn't available in
33116 hardware. */
33117 if (swap)
33119 rtx tmp = gen_reg_rtx (mode1);
33120 emit_move_insn (tmp, op1);
33121 op1 = op0;
33122 op0 = tmp;
33125 if (optimize || !target
33126 || GET_MODE (target) != tmode
33127 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33128 target = gen_reg_rtx (tmode);
33130 if ((optimize && !register_operand (op0, mode0))
33131 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33132 op0 = copy_to_mode_reg (mode0, op0);
33133 if ((optimize && !register_operand (op1, mode1))
33134 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33135 op1 = copy_to_mode_reg (mode1, op1);
33137 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33138 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33139 if (! pat)
33140 return 0;
33141 emit_insn (pat);
33142 return target;
33145 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33147 static rtx
33148 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33149 rtx target)
33151 rtx pat;
33152 tree arg0 = CALL_EXPR_ARG (exp, 0);
33153 tree arg1 = CALL_EXPR_ARG (exp, 1);
33154 rtx op0 = expand_normal (arg0);
33155 rtx op1 = expand_normal (arg1);
33156 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33157 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33158 enum rtx_code comparison = d->comparison;
33160 if (VECTOR_MODE_P (mode0))
33161 op0 = safe_vector_operand (op0, mode0);
33162 if (VECTOR_MODE_P (mode1))
33163 op1 = safe_vector_operand (op1, mode1);
33165 /* Swap operands if we have a comparison that isn't available in
33166 hardware. */
33167 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33169 rtx tmp = op1;
33170 op1 = op0;
33171 op0 = tmp;
33174 target = gen_reg_rtx (SImode);
33175 emit_move_insn (target, const0_rtx);
33176 target = gen_rtx_SUBREG (QImode, target, 0);
33178 if ((optimize && !register_operand (op0, mode0))
33179 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33180 op0 = copy_to_mode_reg (mode0, op0);
33181 if ((optimize && !register_operand (op1, mode1))
33182 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33183 op1 = copy_to_mode_reg (mode1, op1);
33185 pat = GEN_FCN (d->icode) (op0, op1);
33186 if (! pat)
33187 return 0;
33188 emit_insn (pat);
33189 emit_insn (gen_rtx_SET (VOIDmode,
33190 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33191 gen_rtx_fmt_ee (comparison, QImode,
33192 SET_DEST (pat),
33193 const0_rtx)));
33195 return SUBREG_REG (target);
33198 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33200 static rtx
33201 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33202 rtx target)
33204 rtx pat;
33205 tree arg0 = CALL_EXPR_ARG (exp, 0);
33206 rtx op1, op0 = expand_normal (arg0);
33207 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33208 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33210 if (optimize || target == 0
33211 || GET_MODE (target) != tmode
33212 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33213 target = gen_reg_rtx (tmode);
33215 if (VECTOR_MODE_P (mode0))
33216 op0 = safe_vector_operand (op0, mode0);
33218 if ((optimize && !register_operand (op0, mode0))
33219 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33220 op0 = copy_to_mode_reg (mode0, op0);
33222 op1 = GEN_INT (d->comparison);
33224 pat = GEN_FCN (d->icode) (target, op0, op1);
33225 if (! pat)
33226 return 0;
33227 emit_insn (pat);
33228 return target;
33231 static rtx
33232 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33233 tree exp, rtx target)
33235 rtx pat;
33236 tree arg0 = CALL_EXPR_ARG (exp, 0);
33237 tree arg1 = CALL_EXPR_ARG (exp, 1);
33238 rtx op0 = expand_normal (arg0);
33239 rtx op1 = expand_normal (arg1);
33240 rtx op2;
33241 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33242 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33243 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33245 if (optimize || target == 0
33246 || GET_MODE (target) != tmode
33247 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33248 target = gen_reg_rtx (tmode);
33250 op0 = safe_vector_operand (op0, mode0);
33251 op1 = safe_vector_operand (op1, mode1);
33253 if ((optimize && !register_operand (op0, mode0))
33254 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33255 op0 = copy_to_mode_reg (mode0, op0);
33256 if ((optimize && !register_operand (op1, mode1))
33257 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33258 op1 = copy_to_mode_reg (mode1, op1);
33260 op2 = GEN_INT (d->comparison);
33262 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33263 if (! pat)
33264 return 0;
33265 emit_insn (pat);
33266 return target;
33269 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33271 static rtx
33272 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33273 rtx target)
33275 rtx pat;
33276 tree arg0 = CALL_EXPR_ARG (exp, 0);
33277 tree arg1 = CALL_EXPR_ARG (exp, 1);
33278 rtx op0 = expand_normal (arg0);
33279 rtx op1 = expand_normal (arg1);
33280 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33281 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33282 enum rtx_code comparison = d->comparison;
33284 if (VECTOR_MODE_P (mode0))
33285 op0 = safe_vector_operand (op0, mode0);
33286 if (VECTOR_MODE_P (mode1))
33287 op1 = safe_vector_operand (op1, mode1);
33289 target = gen_reg_rtx (SImode);
33290 emit_move_insn (target, const0_rtx);
33291 target = gen_rtx_SUBREG (QImode, target, 0);
33293 if ((optimize && !register_operand (op0, mode0))
33294 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33295 op0 = copy_to_mode_reg (mode0, op0);
33296 if ((optimize && !register_operand (op1, mode1))
33297 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33298 op1 = copy_to_mode_reg (mode1, op1);
33300 pat = GEN_FCN (d->icode) (op0, op1);
33301 if (! pat)
33302 return 0;
33303 emit_insn (pat);
33304 emit_insn (gen_rtx_SET (VOIDmode,
33305 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33306 gen_rtx_fmt_ee (comparison, QImode,
33307 SET_DEST (pat),
33308 const0_rtx)));
33310 return SUBREG_REG (target);
33313 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33315 static rtx
33316 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33317 tree exp, rtx target)
33319 rtx pat;
33320 tree arg0 = CALL_EXPR_ARG (exp, 0);
33321 tree arg1 = CALL_EXPR_ARG (exp, 1);
33322 tree arg2 = CALL_EXPR_ARG (exp, 2);
33323 tree arg3 = CALL_EXPR_ARG (exp, 3);
33324 tree arg4 = CALL_EXPR_ARG (exp, 4);
33325 rtx scratch0, scratch1;
33326 rtx op0 = expand_normal (arg0);
33327 rtx op1 = expand_normal (arg1);
33328 rtx op2 = expand_normal (arg2);
33329 rtx op3 = expand_normal (arg3);
33330 rtx op4 = expand_normal (arg4);
33331 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33333 tmode0 = insn_data[d->icode].operand[0].mode;
33334 tmode1 = insn_data[d->icode].operand[1].mode;
33335 modev2 = insn_data[d->icode].operand[2].mode;
33336 modei3 = insn_data[d->icode].operand[3].mode;
33337 modev4 = insn_data[d->icode].operand[4].mode;
33338 modei5 = insn_data[d->icode].operand[5].mode;
33339 modeimm = insn_data[d->icode].operand[6].mode;
33341 if (VECTOR_MODE_P (modev2))
33342 op0 = safe_vector_operand (op0, modev2);
33343 if (VECTOR_MODE_P (modev4))
33344 op2 = safe_vector_operand (op2, modev4);
33346 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33347 op0 = copy_to_mode_reg (modev2, op0);
33348 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33349 op1 = copy_to_mode_reg (modei3, op1);
33350 if ((optimize && !register_operand (op2, modev4))
33351 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33352 op2 = copy_to_mode_reg (modev4, op2);
33353 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33354 op3 = copy_to_mode_reg (modei5, op3);
33356 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33358 error ("the fifth argument must be an 8-bit immediate");
33359 return const0_rtx;
33362 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33364 if (optimize || !target
33365 || GET_MODE (target) != tmode0
33366 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33367 target = gen_reg_rtx (tmode0);
33369 scratch1 = gen_reg_rtx (tmode1);
33371 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33373 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33375 if (optimize || !target
33376 || GET_MODE (target) != tmode1
33377 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33378 target = gen_reg_rtx (tmode1);
33380 scratch0 = gen_reg_rtx (tmode0);
33382 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33384 else
33386 gcc_assert (d->flag);
33388 scratch0 = gen_reg_rtx (tmode0);
33389 scratch1 = gen_reg_rtx (tmode1);
33391 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33394 if (! pat)
33395 return 0;
33397 emit_insn (pat);
33399 if (d->flag)
33401 target = gen_reg_rtx (SImode);
33402 emit_move_insn (target, const0_rtx);
33403 target = gen_rtx_SUBREG (QImode, target, 0);
33405 emit_insn
33406 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33407 gen_rtx_fmt_ee (EQ, QImode,
33408 gen_rtx_REG ((enum machine_mode) d->flag,
33409 FLAGS_REG),
33410 const0_rtx)));
33411 return SUBREG_REG (target);
33413 else
33414 return target;
33418 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33420 static rtx
33421 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33422 tree exp, rtx target)
33424 rtx pat;
33425 tree arg0 = CALL_EXPR_ARG (exp, 0);
33426 tree arg1 = CALL_EXPR_ARG (exp, 1);
33427 tree arg2 = CALL_EXPR_ARG (exp, 2);
33428 rtx scratch0, scratch1;
33429 rtx op0 = expand_normal (arg0);
33430 rtx op1 = expand_normal (arg1);
33431 rtx op2 = expand_normal (arg2);
33432 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33434 tmode0 = insn_data[d->icode].operand[0].mode;
33435 tmode1 = insn_data[d->icode].operand[1].mode;
33436 modev2 = insn_data[d->icode].operand[2].mode;
33437 modev3 = insn_data[d->icode].operand[3].mode;
33438 modeimm = insn_data[d->icode].operand[4].mode;
33440 if (VECTOR_MODE_P (modev2))
33441 op0 = safe_vector_operand (op0, modev2);
33442 if (VECTOR_MODE_P (modev3))
33443 op1 = safe_vector_operand (op1, modev3);
33445 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33446 op0 = copy_to_mode_reg (modev2, op0);
33447 if ((optimize && !register_operand (op1, modev3))
33448 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33449 op1 = copy_to_mode_reg (modev3, op1);
33451 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33453 error ("the third argument must be an 8-bit immediate");
33454 return const0_rtx;
33457 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33459 if (optimize || !target
33460 || GET_MODE (target) != tmode0
33461 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33462 target = gen_reg_rtx (tmode0);
33464 scratch1 = gen_reg_rtx (tmode1);
33466 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33468 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33470 if (optimize || !target
33471 || GET_MODE (target) != tmode1
33472 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33473 target = gen_reg_rtx (tmode1);
33475 scratch0 = gen_reg_rtx (tmode0);
33477 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33479 else
33481 gcc_assert (d->flag);
33483 scratch0 = gen_reg_rtx (tmode0);
33484 scratch1 = gen_reg_rtx (tmode1);
33486 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33489 if (! pat)
33490 return 0;
33492 emit_insn (pat);
33494 if (d->flag)
33496 target = gen_reg_rtx (SImode);
33497 emit_move_insn (target, const0_rtx);
33498 target = gen_rtx_SUBREG (QImode, target, 0);
33500 emit_insn
33501 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33502 gen_rtx_fmt_ee (EQ, QImode,
33503 gen_rtx_REG ((enum machine_mode) d->flag,
33504 FLAGS_REG),
33505 const0_rtx)));
33506 return SUBREG_REG (target);
33508 else
33509 return target;
33512 /* Subroutine of ix86_expand_builtin to take care of insns with
33513 variable number of operands. */
33515 static rtx
33516 ix86_expand_args_builtin (const struct builtin_description *d,
33517 tree exp, rtx target)
33519 rtx pat, real_target;
33520 unsigned int i, nargs;
33521 unsigned int nargs_constant = 0;
33522 unsigned int mask_pos = 0;
33523 int num_memory = 0;
33524 struct
33526 rtx op;
33527 enum machine_mode mode;
33528 } args[6];
33529 bool last_arg_count = false;
33530 enum insn_code icode = d->icode;
33531 const struct insn_data_d *insn_p = &insn_data[icode];
33532 enum machine_mode tmode = insn_p->operand[0].mode;
33533 enum machine_mode rmode = VOIDmode;
33534 bool swap = false;
33535 enum rtx_code comparison = d->comparison;
33537 switch ((enum ix86_builtin_func_type) d->flag)
33539 case V2DF_FTYPE_V2DF_ROUND:
33540 case V4DF_FTYPE_V4DF_ROUND:
33541 case V4SF_FTYPE_V4SF_ROUND:
33542 case V8SF_FTYPE_V8SF_ROUND:
33543 case V4SI_FTYPE_V4SF_ROUND:
33544 case V8SI_FTYPE_V8SF_ROUND:
33545 return ix86_expand_sse_round (d, exp, target);
33546 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33547 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33548 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33549 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33550 case INT_FTYPE_V8SF_V8SF_PTEST:
33551 case INT_FTYPE_V4DI_V4DI_PTEST:
33552 case INT_FTYPE_V4DF_V4DF_PTEST:
33553 case INT_FTYPE_V4SF_V4SF_PTEST:
33554 case INT_FTYPE_V2DI_V2DI_PTEST:
33555 case INT_FTYPE_V2DF_V2DF_PTEST:
33556 return ix86_expand_sse_ptest (d, exp, target);
33557 case FLOAT128_FTYPE_FLOAT128:
33558 case FLOAT_FTYPE_FLOAT:
33559 case INT_FTYPE_INT:
33560 case UINT64_FTYPE_INT:
33561 case UINT16_FTYPE_UINT16:
33562 case INT64_FTYPE_INT64:
33563 case INT64_FTYPE_V4SF:
33564 case INT64_FTYPE_V2DF:
33565 case INT_FTYPE_V16QI:
33566 case INT_FTYPE_V8QI:
33567 case INT_FTYPE_V8SF:
33568 case INT_FTYPE_V4DF:
33569 case INT_FTYPE_V4SF:
33570 case INT_FTYPE_V2DF:
33571 case INT_FTYPE_V32QI:
33572 case V16QI_FTYPE_V16QI:
33573 case V8SI_FTYPE_V8SF:
33574 case V8SI_FTYPE_V4SI:
33575 case V8HI_FTYPE_V8HI:
33576 case V8HI_FTYPE_V16QI:
33577 case V8QI_FTYPE_V8QI:
33578 case V8SF_FTYPE_V8SF:
33579 case V8SF_FTYPE_V8SI:
33580 case V8SF_FTYPE_V4SF:
33581 case V8SF_FTYPE_V8HI:
33582 case V4SI_FTYPE_V4SI:
33583 case V4SI_FTYPE_V16QI:
33584 case V4SI_FTYPE_V4SF:
33585 case V4SI_FTYPE_V8SI:
33586 case V4SI_FTYPE_V8HI:
33587 case V4SI_FTYPE_V4DF:
33588 case V4SI_FTYPE_V2DF:
33589 case V4HI_FTYPE_V4HI:
33590 case V4DF_FTYPE_V4DF:
33591 case V4DF_FTYPE_V4SI:
33592 case V4DF_FTYPE_V4SF:
33593 case V4DF_FTYPE_V2DF:
33594 case V4SF_FTYPE_V4SF:
33595 case V4SF_FTYPE_V4SI:
33596 case V4SF_FTYPE_V8SF:
33597 case V4SF_FTYPE_V4DF:
33598 case V4SF_FTYPE_V8HI:
33599 case V4SF_FTYPE_V2DF:
33600 case V2DI_FTYPE_V2DI:
33601 case V2DI_FTYPE_V16QI:
33602 case V2DI_FTYPE_V8HI:
33603 case V2DI_FTYPE_V4SI:
33604 case V2DF_FTYPE_V2DF:
33605 case V2DF_FTYPE_V4SI:
33606 case V2DF_FTYPE_V4DF:
33607 case V2DF_FTYPE_V4SF:
33608 case V2DF_FTYPE_V2SI:
33609 case V2SI_FTYPE_V2SI:
33610 case V2SI_FTYPE_V4SF:
33611 case V2SI_FTYPE_V2SF:
33612 case V2SI_FTYPE_V2DF:
33613 case V2SF_FTYPE_V2SF:
33614 case V2SF_FTYPE_V2SI:
33615 case V32QI_FTYPE_V32QI:
33616 case V32QI_FTYPE_V16QI:
33617 case V16HI_FTYPE_V16HI:
33618 case V16HI_FTYPE_V8HI:
33619 case V8SI_FTYPE_V8SI:
33620 case V16HI_FTYPE_V16QI:
33621 case V8SI_FTYPE_V16QI:
33622 case V4DI_FTYPE_V16QI:
33623 case V8SI_FTYPE_V8HI:
33624 case V4DI_FTYPE_V8HI:
33625 case V4DI_FTYPE_V4SI:
33626 case V4DI_FTYPE_V2DI:
33627 case HI_FTYPE_HI:
33628 case UINT_FTYPE_V2DF:
33629 case UINT_FTYPE_V4SF:
33630 case UINT64_FTYPE_V2DF:
33631 case UINT64_FTYPE_V4SF:
33632 case V16QI_FTYPE_V8DI:
33633 case V16HI_FTYPE_V16SI:
33634 case V16SI_FTYPE_HI:
33635 case V16SI_FTYPE_V16SI:
33636 case V16SI_FTYPE_INT:
33637 case V16SF_FTYPE_FLOAT:
33638 case V16SF_FTYPE_V4SF:
33639 case V16SF_FTYPE_V16SF:
33640 case V8HI_FTYPE_V8DI:
33641 case V8UHI_FTYPE_V8UHI:
33642 case V8SI_FTYPE_V8DI:
33643 case V8USI_FTYPE_V8USI:
33644 case V8SF_FTYPE_V8DF:
33645 case V8DI_FTYPE_QI:
33646 case V8DI_FTYPE_INT64:
33647 case V8DI_FTYPE_V4DI:
33648 case V8DI_FTYPE_V8DI:
33649 case V8DF_FTYPE_DOUBLE:
33650 case V8DF_FTYPE_V4DF:
33651 case V8DF_FTYPE_V8DF:
33652 case V8DF_FTYPE_V8SI:
33653 nargs = 1;
33654 break;
33655 case V4SF_FTYPE_V4SF_VEC_MERGE:
33656 case V2DF_FTYPE_V2DF_VEC_MERGE:
33657 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33658 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33659 case V16QI_FTYPE_V16QI_V16QI:
33660 case V16QI_FTYPE_V8HI_V8HI:
33661 case V16SI_FTYPE_V16SI_V16SI:
33662 case V16SF_FTYPE_V16SF_V16SF:
33663 case V16SF_FTYPE_V16SF_V16SI:
33664 case V8QI_FTYPE_V8QI_V8QI:
33665 case V8QI_FTYPE_V4HI_V4HI:
33666 case V8HI_FTYPE_V8HI_V8HI:
33667 case V8HI_FTYPE_V16QI_V16QI:
33668 case V8HI_FTYPE_V4SI_V4SI:
33669 case V8SF_FTYPE_V8SF_V8SF:
33670 case V8SF_FTYPE_V8SF_V8SI:
33671 case V8DI_FTYPE_V8DI_V8DI:
33672 case V8DF_FTYPE_V8DF_V8DF:
33673 case V8DF_FTYPE_V8DF_V8DI:
33674 case V4SI_FTYPE_V4SI_V4SI:
33675 case V4SI_FTYPE_V8HI_V8HI:
33676 case V4SI_FTYPE_V4SF_V4SF:
33677 case V4SI_FTYPE_V2DF_V2DF:
33678 case V4HI_FTYPE_V4HI_V4HI:
33679 case V4HI_FTYPE_V8QI_V8QI:
33680 case V4HI_FTYPE_V2SI_V2SI:
33681 case V4DF_FTYPE_V4DF_V4DF:
33682 case V4DF_FTYPE_V4DF_V4DI:
33683 case V4SF_FTYPE_V4SF_V4SF:
33684 case V4SF_FTYPE_V4SF_V4SI:
33685 case V4SF_FTYPE_V4SF_V2SI:
33686 case V4SF_FTYPE_V4SF_V2DF:
33687 case V4SF_FTYPE_V4SF_UINT:
33688 case V4SF_FTYPE_V4SF_UINT64:
33689 case V4SF_FTYPE_V4SF_DI:
33690 case V4SF_FTYPE_V4SF_SI:
33691 case V2DI_FTYPE_V2DI_V2DI:
33692 case V2DI_FTYPE_V16QI_V16QI:
33693 case V2DI_FTYPE_V4SI_V4SI:
33694 case V2UDI_FTYPE_V4USI_V4USI:
33695 case V2DI_FTYPE_V2DI_V16QI:
33696 case V2DI_FTYPE_V2DF_V2DF:
33697 case V2SI_FTYPE_V2SI_V2SI:
33698 case V2SI_FTYPE_V4HI_V4HI:
33699 case V2SI_FTYPE_V2SF_V2SF:
33700 case V2DF_FTYPE_V2DF_V2DF:
33701 case V2DF_FTYPE_V2DF_V4SF:
33702 case V2DF_FTYPE_V2DF_V2DI:
33703 case V2DF_FTYPE_V2DF_DI:
33704 case V2DF_FTYPE_V2DF_SI:
33705 case V2DF_FTYPE_V2DF_UINT:
33706 case V2DF_FTYPE_V2DF_UINT64:
33707 case V2SF_FTYPE_V2SF_V2SF:
33708 case V1DI_FTYPE_V1DI_V1DI:
33709 case V1DI_FTYPE_V8QI_V8QI:
33710 case V1DI_FTYPE_V2SI_V2SI:
33711 case V32QI_FTYPE_V16HI_V16HI:
33712 case V16HI_FTYPE_V8SI_V8SI:
33713 case V32QI_FTYPE_V32QI_V32QI:
33714 case V16HI_FTYPE_V32QI_V32QI:
33715 case V16HI_FTYPE_V16HI_V16HI:
33716 case V8SI_FTYPE_V4DF_V4DF:
33717 case V8SI_FTYPE_V8SI_V8SI:
33718 case V8SI_FTYPE_V16HI_V16HI:
33719 case V4DI_FTYPE_V4DI_V4DI:
33720 case V4DI_FTYPE_V8SI_V8SI:
33721 case V4UDI_FTYPE_V8USI_V8USI:
33722 case QI_FTYPE_V8DI_V8DI:
33723 case HI_FTYPE_V16SI_V16SI:
33724 if (comparison == UNKNOWN)
33725 return ix86_expand_binop_builtin (icode, exp, target);
33726 nargs = 2;
33727 break;
33728 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33729 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33730 gcc_assert (comparison != UNKNOWN);
33731 nargs = 2;
33732 swap = true;
33733 break;
33734 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33735 case V16HI_FTYPE_V16HI_SI_COUNT:
33736 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33737 case V8SI_FTYPE_V8SI_SI_COUNT:
33738 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33739 case V4DI_FTYPE_V4DI_INT_COUNT:
33740 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33741 case V8HI_FTYPE_V8HI_SI_COUNT:
33742 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33743 case V4SI_FTYPE_V4SI_SI_COUNT:
33744 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33745 case V4HI_FTYPE_V4HI_SI_COUNT:
33746 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33747 case V2DI_FTYPE_V2DI_SI_COUNT:
33748 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33749 case V2SI_FTYPE_V2SI_SI_COUNT:
33750 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33751 case V1DI_FTYPE_V1DI_SI_COUNT:
33752 nargs = 2;
33753 last_arg_count = true;
33754 break;
33755 case UINT64_FTYPE_UINT64_UINT64:
33756 case UINT_FTYPE_UINT_UINT:
33757 case UINT_FTYPE_UINT_USHORT:
33758 case UINT_FTYPE_UINT_UCHAR:
33759 case UINT16_FTYPE_UINT16_INT:
33760 case UINT8_FTYPE_UINT8_INT:
33761 case HI_FTYPE_HI_HI:
33762 case V16SI_FTYPE_V8DF_V8DF:
33763 nargs = 2;
33764 break;
33765 case V2DI_FTYPE_V2DI_INT_CONVERT:
33766 nargs = 2;
33767 rmode = V1TImode;
33768 nargs_constant = 1;
33769 break;
33770 case V4DI_FTYPE_V4DI_INT_CONVERT:
33771 nargs = 2;
33772 rmode = V2TImode;
33773 nargs_constant = 1;
33774 break;
33775 case V8HI_FTYPE_V8HI_INT:
33776 case V8HI_FTYPE_V8SF_INT:
33777 case V16HI_FTYPE_V16SF_INT:
33778 case V8HI_FTYPE_V4SF_INT:
33779 case V8SF_FTYPE_V8SF_INT:
33780 case V4SF_FTYPE_V16SF_INT:
33781 case V16SF_FTYPE_V16SF_INT:
33782 case V4SI_FTYPE_V4SI_INT:
33783 case V4SI_FTYPE_V8SI_INT:
33784 case V4HI_FTYPE_V4HI_INT:
33785 case V4DF_FTYPE_V4DF_INT:
33786 case V4DF_FTYPE_V8DF_INT:
33787 case V4SF_FTYPE_V4SF_INT:
33788 case V4SF_FTYPE_V8SF_INT:
33789 case V2DI_FTYPE_V2DI_INT:
33790 case V2DF_FTYPE_V2DF_INT:
33791 case V2DF_FTYPE_V4DF_INT:
33792 case V16HI_FTYPE_V16HI_INT:
33793 case V8SI_FTYPE_V8SI_INT:
33794 case V16SI_FTYPE_V16SI_INT:
33795 case V4SI_FTYPE_V16SI_INT:
33796 case V4DI_FTYPE_V4DI_INT:
33797 case V2DI_FTYPE_V4DI_INT:
33798 case V4DI_FTYPE_V8DI_INT:
33799 case HI_FTYPE_HI_INT:
33800 nargs = 2;
33801 nargs_constant = 1;
33802 break;
33803 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33804 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33805 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33806 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33807 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33808 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33809 case HI_FTYPE_V16SI_V16SI_HI:
33810 case QI_FTYPE_V8DI_V8DI_QI:
33811 case V16HI_FTYPE_V16SI_V16HI_HI:
33812 case V16QI_FTYPE_V16SI_V16QI_HI:
33813 case V16QI_FTYPE_V8DI_V16QI_QI:
33814 case V16SF_FTYPE_V16SF_V16SF_HI:
33815 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33816 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33817 case V16SF_FTYPE_V16SI_V16SF_HI:
33818 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33819 case V16SF_FTYPE_V4SF_V16SF_HI:
33820 case V16SI_FTYPE_SI_V16SI_HI:
33821 case V16SI_FTYPE_V16HI_V16SI_HI:
33822 case V16SI_FTYPE_V16QI_V16SI_HI:
33823 case V16SI_FTYPE_V16SF_V16SI_HI:
33824 case V16SI_FTYPE_V16SI_V16SI_HI:
33825 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33826 case V16SI_FTYPE_V4SI_V16SI_HI:
33827 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33828 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33829 case V8DF_FTYPE_V2DF_V8DF_QI:
33830 case V8DF_FTYPE_V4DF_V8DF_QI:
33831 case V8DF_FTYPE_V8DF_V8DF_QI:
33832 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33833 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33834 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33835 case V8DF_FTYPE_V8SF_V8DF_QI:
33836 case V8DF_FTYPE_V8SI_V8DF_QI:
33837 case V8DI_FTYPE_DI_V8DI_QI:
33838 case V8DI_FTYPE_V16QI_V8DI_QI:
33839 case V8DI_FTYPE_V2DI_V8DI_QI:
33840 case V8DI_FTYPE_V4DI_V8DI_QI:
33841 case V8DI_FTYPE_V8DI_V8DI_QI:
33842 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33843 case V8DI_FTYPE_V8HI_V8DI_QI:
33844 case V8DI_FTYPE_V8SI_V8DI_QI:
33845 case V8HI_FTYPE_V8DI_V8HI_QI:
33846 case V8SF_FTYPE_V8DF_V8SF_QI:
33847 case V8SI_FTYPE_V8DF_V8SI_QI:
33848 case V8SI_FTYPE_V8DI_V8SI_QI:
33849 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33850 nargs = 3;
33851 break;
33852 case V32QI_FTYPE_V32QI_V32QI_INT:
33853 case V16HI_FTYPE_V16HI_V16HI_INT:
33854 case V16QI_FTYPE_V16QI_V16QI_INT:
33855 case V4DI_FTYPE_V4DI_V4DI_INT:
33856 case V8HI_FTYPE_V8HI_V8HI_INT:
33857 case V8SI_FTYPE_V8SI_V8SI_INT:
33858 case V8SI_FTYPE_V8SI_V4SI_INT:
33859 case V8SF_FTYPE_V8SF_V8SF_INT:
33860 case V8SF_FTYPE_V8SF_V4SF_INT:
33861 case V4SI_FTYPE_V4SI_V4SI_INT:
33862 case V4DF_FTYPE_V4DF_V4DF_INT:
33863 case V16SF_FTYPE_V16SF_V16SF_INT:
33864 case V16SF_FTYPE_V16SF_V4SF_INT:
33865 case V16SI_FTYPE_V16SI_V4SI_INT:
33866 case V4DF_FTYPE_V4DF_V2DF_INT:
33867 case V4SF_FTYPE_V4SF_V4SF_INT:
33868 case V2DI_FTYPE_V2DI_V2DI_INT:
33869 case V4DI_FTYPE_V4DI_V2DI_INT:
33870 case V2DF_FTYPE_V2DF_V2DF_INT:
33871 case QI_FTYPE_V8DI_V8DI_INT:
33872 case QI_FTYPE_V8DF_V8DF_INT:
33873 case QI_FTYPE_V2DF_V2DF_INT:
33874 case QI_FTYPE_V4SF_V4SF_INT:
33875 case HI_FTYPE_V16SI_V16SI_INT:
33876 case HI_FTYPE_V16SF_V16SF_INT:
33877 nargs = 3;
33878 nargs_constant = 1;
33879 break;
33880 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33881 nargs = 3;
33882 rmode = V4DImode;
33883 nargs_constant = 1;
33884 break;
33885 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33886 nargs = 3;
33887 rmode = V2DImode;
33888 nargs_constant = 1;
33889 break;
33890 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33891 nargs = 3;
33892 rmode = DImode;
33893 nargs_constant = 1;
33894 break;
33895 case V2DI_FTYPE_V2DI_UINT_UINT:
33896 nargs = 3;
33897 nargs_constant = 2;
33898 break;
33899 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33900 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33901 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33902 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33903 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33904 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33905 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33906 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33907 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33908 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33909 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33910 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33911 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33912 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33913 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33914 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33915 nargs = 4;
33916 break;
33917 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33918 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33919 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33920 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33921 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33922 nargs = 4;
33923 nargs_constant = 1;
33924 break;
33925 case QI_FTYPE_V2DF_V2DF_INT_QI:
33926 case QI_FTYPE_V4SF_V4SF_INT_QI:
33927 nargs = 4;
33928 mask_pos = 1;
33929 nargs_constant = 1;
33930 break;
33931 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33932 nargs = 4;
33933 nargs_constant = 2;
33934 break;
33935 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33936 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33937 nargs = 4;
33938 break;
33939 case QI_FTYPE_V8DI_V8DI_INT_QI:
33940 case HI_FTYPE_V16SI_V16SI_INT_HI:
33941 case QI_FTYPE_V8DF_V8DF_INT_QI:
33942 case HI_FTYPE_V16SF_V16SF_INT_HI:
33943 mask_pos = 1;
33944 nargs = 4;
33945 nargs_constant = 1;
33946 break;
33947 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33948 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33949 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33950 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33951 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33952 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33953 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33954 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33955 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33956 nargs = 4;
33957 mask_pos = 2;
33958 nargs_constant = 1;
33959 break;
33960 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33961 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33962 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33963 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33964 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33965 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33966 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33967 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33968 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33969 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33970 nargs = 5;
33971 mask_pos = 2;
33972 nargs_constant = 1;
33973 break;
33974 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33975 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33976 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33977 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33978 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33979 nargs = 5;
33980 mask_pos = 1;
33981 nargs_constant = 1;
33982 break;
33984 default:
33985 gcc_unreachable ();
33988 gcc_assert (nargs <= ARRAY_SIZE (args));
33990 if (comparison != UNKNOWN)
33992 gcc_assert (nargs == 2);
33993 return ix86_expand_sse_compare (d, exp, target, swap);
33996 if (rmode == VOIDmode || rmode == tmode)
33998 if (optimize
33999 || target == 0
34000 || GET_MODE (target) != tmode
34001 || !insn_p->operand[0].predicate (target, tmode))
34002 target = gen_reg_rtx (tmode);
34003 real_target = target;
34005 else
34007 real_target = gen_reg_rtx (tmode);
34008 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34011 for (i = 0; i < nargs; i++)
34013 tree arg = CALL_EXPR_ARG (exp, i);
34014 rtx op = expand_normal (arg);
34015 enum machine_mode mode = insn_p->operand[i + 1].mode;
34016 bool match = insn_p->operand[i + 1].predicate (op, mode);
34018 if (last_arg_count && (i + 1) == nargs)
34020 /* SIMD shift insns take either an 8-bit immediate or
34021 register as count. But builtin functions take int as
34022 count. If count doesn't match, we put it in register. */
34023 if (!match)
34025 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34026 if (!insn_p->operand[i + 1].predicate (op, mode))
34027 op = copy_to_reg (op);
34030 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34031 (!mask_pos && (nargs - i) <= nargs_constant))
34033 if (!match)
34034 switch (icode)
34036 case CODE_FOR_avx2_inserti128:
34037 case CODE_FOR_avx2_extracti128:
34038 error ("the last argument must be an 1-bit immediate");
34039 return const0_rtx;
34041 case CODE_FOR_avx512f_cmpv8di3_mask:
34042 case CODE_FOR_avx512f_cmpv16si3_mask:
34043 case CODE_FOR_avx512f_ucmpv8di3_mask:
34044 case CODE_FOR_avx512f_ucmpv16si3_mask:
34045 error ("the last argument must be a 3-bit immediate");
34046 return const0_rtx;
34048 case CODE_FOR_sse4_1_roundsd:
34049 case CODE_FOR_sse4_1_roundss:
34051 case CODE_FOR_sse4_1_roundpd:
34052 case CODE_FOR_sse4_1_roundps:
34053 case CODE_FOR_avx_roundpd256:
34054 case CODE_FOR_avx_roundps256:
34056 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34057 case CODE_FOR_sse4_1_roundps_sfix:
34058 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34059 case CODE_FOR_avx_roundps_sfix256:
34061 case CODE_FOR_sse4_1_blendps:
34062 case CODE_FOR_avx_blendpd256:
34063 case CODE_FOR_avx_vpermilv4df:
34064 case CODE_FOR_avx512f_getmantv8df_mask:
34065 case CODE_FOR_avx512f_getmantv16sf_mask:
34066 error ("the last argument must be a 4-bit immediate");
34067 return const0_rtx;
34069 case CODE_FOR_sha1rnds4:
34070 case CODE_FOR_sse4_1_blendpd:
34071 case CODE_FOR_avx_vpermilv2df:
34072 case CODE_FOR_xop_vpermil2v2df3:
34073 case CODE_FOR_xop_vpermil2v4sf3:
34074 case CODE_FOR_xop_vpermil2v4df3:
34075 case CODE_FOR_xop_vpermil2v8sf3:
34076 case CODE_FOR_avx512f_vinsertf32x4_mask:
34077 case CODE_FOR_avx512f_vinserti32x4_mask:
34078 case CODE_FOR_avx512f_vextractf32x4_mask:
34079 case CODE_FOR_avx512f_vextracti32x4_mask:
34080 error ("the last argument must be a 2-bit immediate");
34081 return const0_rtx;
34083 case CODE_FOR_avx_vextractf128v4df:
34084 case CODE_FOR_avx_vextractf128v8sf:
34085 case CODE_FOR_avx_vextractf128v8si:
34086 case CODE_FOR_avx_vinsertf128v4df:
34087 case CODE_FOR_avx_vinsertf128v8sf:
34088 case CODE_FOR_avx_vinsertf128v8si:
34089 case CODE_FOR_avx512f_vinsertf64x4_mask:
34090 case CODE_FOR_avx512f_vinserti64x4_mask:
34091 case CODE_FOR_avx512f_vextractf64x4_mask:
34092 case CODE_FOR_avx512f_vextracti64x4_mask:
34093 error ("the last argument must be a 1-bit immediate");
34094 return const0_rtx;
34096 case CODE_FOR_avx_vmcmpv2df3:
34097 case CODE_FOR_avx_vmcmpv4sf3:
34098 case CODE_FOR_avx_cmpv2df3:
34099 case CODE_FOR_avx_cmpv4sf3:
34100 case CODE_FOR_avx_cmpv4df3:
34101 case CODE_FOR_avx_cmpv8sf3:
34102 case CODE_FOR_avx512f_cmpv8df3_mask:
34103 case CODE_FOR_avx512f_cmpv16sf3_mask:
34104 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34105 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34106 error ("the last argument must be a 5-bit immediate");
34107 return const0_rtx;
34109 default:
34110 switch (nargs_constant)
34112 case 2:
34113 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34114 (!mask_pos && (nargs - i) == nargs_constant))
34116 error ("the next to last argument must be an 8-bit immediate");
34117 break;
34119 case 1:
34120 error ("the last argument must be an 8-bit immediate");
34121 break;
34122 default:
34123 gcc_unreachable ();
34125 return const0_rtx;
34128 else
34130 if (VECTOR_MODE_P (mode))
34131 op = safe_vector_operand (op, mode);
34133 /* If we aren't optimizing, only allow one memory operand to
34134 be generated. */
34135 if (memory_operand (op, mode))
34136 num_memory++;
34138 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34140 if (optimize || !match || num_memory > 1)
34141 op = copy_to_mode_reg (mode, op);
34143 else
34145 op = copy_to_reg (op);
34146 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34150 args[i].op = op;
34151 args[i].mode = mode;
34154 switch (nargs)
34156 case 1:
34157 pat = GEN_FCN (icode) (real_target, args[0].op);
34158 break;
34159 case 2:
34160 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34161 break;
34162 case 3:
34163 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34164 args[2].op);
34165 break;
34166 case 4:
34167 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34168 args[2].op, args[3].op);
34169 break;
34170 case 5:
34171 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34172 args[2].op, args[3].op, args[4].op);
34173 case 6:
34174 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34175 args[2].op, args[3].op, args[4].op,
34176 args[5].op);
34177 break;
34178 default:
34179 gcc_unreachable ();
34182 if (! pat)
34183 return 0;
34185 emit_insn (pat);
34186 return target;
34189 /* Transform pattern of following layout:
34190 (parallel [
34191 set (A B)
34192 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34194 into:
34195 (set (A B))
34198 (parallel [ A B
34200 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34203 into:
34204 (parallel [ A B ... ]) */
34206 static rtx
34207 ix86_erase_embedded_rounding (rtx pat)
34209 if (GET_CODE (pat) == INSN)
34210 pat = PATTERN (pat);
34212 gcc_assert (GET_CODE (pat) == PARALLEL);
34214 if (XVECLEN (pat, 0) == 2)
34216 rtx p0 = XVECEXP (pat, 0, 0);
34217 rtx p1 = XVECEXP (pat, 0, 1);
34219 gcc_assert (GET_CODE (p0) == SET
34220 && GET_CODE (p1) == UNSPEC
34221 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34223 return p0;
34225 else
34227 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34228 int i = 0;
34229 int j = 0;
34231 for (; i < XVECLEN (pat, 0); ++i)
34233 rtx elem = XVECEXP (pat, 0, i);
34234 if (GET_CODE (elem) != UNSPEC
34235 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34236 res [j++] = elem;
34239 /* No more than 1 occurence was removed. */
34240 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34242 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34246 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34247 with rounding. */
34248 static rtx
34249 ix86_expand_sse_comi_round (const struct builtin_description *d,
34250 tree exp, rtx target)
34252 rtx pat, set_dst;
34253 tree arg0 = CALL_EXPR_ARG (exp, 0);
34254 tree arg1 = CALL_EXPR_ARG (exp, 1);
34255 tree arg2 = CALL_EXPR_ARG (exp, 2);
34256 tree arg3 = CALL_EXPR_ARG (exp, 3);
34257 rtx op0 = expand_normal (arg0);
34258 rtx op1 = expand_normal (arg1);
34259 rtx op2 = expand_normal (arg2);
34260 rtx op3 = expand_normal (arg3);
34261 enum insn_code icode = d->icode;
34262 const struct insn_data_d *insn_p = &insn_data[icode];
34263 enum machine_mode mode0 = insn_p->operand[0].mode;
34264 enum machine_mode mode1 = insn_p->operand[1].mode;
34265 enum rtx_code comparison = UNEQ;
34266 bool need_ucomi = false;
34268 /* See avxintrin.h for values. */
34269 enum rtx_code comi_comparisons[32] =
34271 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34272 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34273 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34275 bool need_ucomi_values[32] =
34277 true, false, false, true, true, false, false, true,
34278 true, false, false, true, true, false, false, true,
34279 false, true, true, false, false, true, true, false,
34280 false, true, true, false, false, true, true, false
34283 if (!CONST_INT_P (op2))
34285 error ("the third argument must be comparison constant");
34286 return const0_rtx;
34288 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34290 error ("incorect comparison mode");
34291 return const0_rtx;
34294 if (!insn_p->operand[2].predicate (op3, SImode))
34296 error ("incorrect rounding operand");
34297 return const0_rtx;
34300 comparison = comi_comparisons[INTVAL (op2)];
34301 need_ucomi = need_ucomi_values[INTVAL (op2)];
34303 if (VECTOR_MODE_P (mode0))
34304 op0 = safe_vector_operand (op0, mode0);
34305 if (VECTOR_MODE_P (mode1))
34306 op1 = safe_vector_operand (op1, mode1);
34308 target = gen_reg_rtx (SImode);
34309 emit_move_insn (target, const0_rtx);
34310 target = gen_rtx_SUBREG (QImode, target, 0);
34312 if ((optimize && !register_operand (op0, mode0))
34313 || !insn_p->operand[0].predicate (op0, mode0))
34314 op0 = copy_to_mode_reg (mode0, op0);
34315 if ((optimize && !register_operand (op1, mode1))
34316 || !insn_p->operand[1].predicate (op1, mode1))
34317 op1 = copy_to_mode_reg (mode1, op1);
34319 if (need_ucomi)
34320 icode = icode == CODE_FOR_sse_comi_round
34321 ? CODE_FOR_sse_ucomi_round
34322 : CODE_FOR_sse2_ucomi_round;
34324 pat = GEN_FCN (icode) (op0, op1, op3);
34325 if (! pat)
34326 return 0;
34328 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34329 if (INTVAL (op3) == NO_ROUND)
34331 pat = ix86_erase_embedded_rounding (pat);
34332 if (! pat)
34333 return 0;
34335 set_dst = SET_DEST (pat);
34337 else
34339 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34340 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34343 emit_insn (pat);
34344 emit_insn (gen_rtx_SET (VOIDmode,
34345 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34346 gen_rtx_fmt_ee (comparison, QImode,
34347 set_dst,
34348 const0_rtx)));
34350 return SUBREG_REG (target);
34353 static rtx
34354 ix86_expand_round_builtin (const struct builtin_description *d,
34355 tree exp, rtx target)
34357 rtx pat;
34358 unsigned int i, nargs;
34359 struct
34361 rtx op;
34362 enum machine_mode mode;
34363 } args[6];
34364 enum insn_code icode = d->icode;
34365 const struct insn_data_d *insn_p = &insn_data[icode];
34366 enum machine_mode tmode = insn_p->operand[0].mode;
34367 unsigned int nargs_constant = 0;
34368 unsigned int redundant_embed_rnd = 0;
34370 switch ((enum ix86_builtin_func_type) d->flag)
34372 case UINT64_FTYPE_V2DF_INT:
34373 case UINT64_FTYPE_V4SF_INT:
34374 case UINT_FTYPE_V2DF_INT:
34375 case UINT_FTYPE_V4SF_INT:
34376 case INT64_FTYPE_V2DF_INT:
34377 case INT64_FTYPE_V4SF_INT:
34378 case INT_FTYPE_V2DF_INT:
34379 case INT_FTYPE_V4SF_INT:
34380 nargs = 2;
34381 break;
34382 case V4SF_FTYPE_V4SF_UINT_INT:
34383 case V4SF_FTYPE_V4SF_UINT64_INT:
34384 case V2DF_FTYPE_V2DF_UINT64_INT:
34385 case V4SF_FTYPE_V4SF_INT_INT:
34386 case V4SF_FTYPE_V4SF_INT64_INT:
34387 case V2DF_FTYPE_V2DF_INT64_INT:
34388 case V4SF_FTYPE_V4SF_V4SF_INT:
34389 case V2DF_FTYPE_V2DF_V2DF_INT:
34390 case V4SF_FTYPE_V4SF_V2DF_INT:
34391 case V2DF_FTYPE_V2DF_V4SF_INT:
34392 nargs = 3;
34393 break;
34394 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34395 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34396 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34397 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34398 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34399 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34400 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34401 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34402 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34403 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34404 nargs = 4;
34405 break;
34406 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34407 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34408 nargs_constant = 2;
34409 nargs = 4;
34410 break;
34411 case INT_FTYPE_V4SF_V4SF_INT_INT:
34412 case INT_FTYPE_V2DF_V2DF_INT_INT:
34413 return ix86_expand_sse_comi_round (d, exp, target);
34414 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34415 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34416 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34417 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34418 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34419 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34420 nargs = 5;
34421 break;
34422 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34423 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34424 nargs_constant = 4;
34425 nargs = 5;
34426 break;
34427 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34428 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34429 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34430 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34431 nargs_constant = 3;
34432 nargs = 5;
34433 break;
34434 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34435 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34436 nargs = 6;
34437 nargs_constant = 4;
34438 break;
34439 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34440 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34441 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34442 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34443 nargs = 6;
34444 nargs_constant = 3;
34445 break;
34446 default:
34447 gcc_unreachable ();
34449 gcc_assert (nargs <= ARRAY_SIZE (args));
34451 if (optimize
34452 || target == 0
34453 || GET_MODE (target) != tmode
34454 || !insn_p->operand[0].predicate (target, tmode))
34455 target = gen_reg_rtx (tmode);
34457 for (i = 0; i < nargs; i++)
34459 tree arg = CALL_EXPR_ARG (exp, i);
34460 rtx op = expand_normal (arg);
34461 enum machine_mode mode = insn_p->operand[i + 1].mode;
34462 bool match = insn_p->operand[i + 1].predicate (op, mode);
34464 if (i == nargs - nargs_constant)
34466 if (!match)
34468 switch (icode)
34470 case CODE_FOR_avx512f_getmantv8df_mask_round:
34471 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34472 case CODE_FOR_avx512f_getmantv2df_round:
34473 case CODE_FOR_avx512f_getmantv4sf_round:
34474 error ("the immediate argument must be a 4-bit immediate");
34475 return const0_rtx;
34476 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34477 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34478 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34479 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34480 error ("the immediate argument must be a 5-bit immediate");
34481 return const0_rtx;
34482 default:
34483 error ("the immediate argument must be an 8-bit immediate");
34484 return const0_rtx;
34488 else if (i == nargs-1)
34490 if (!insn_p->operand[nargs].predicate (op, SImode))
34492 error ("incorrect rounding operand");
34493 return const0_rtx;
34496 /* If there is no rounding use normal version of the pattern. */
34497 if (INTVAL (op) == NO_ROUND)
34498 redundant_embed_rnd = 1;
34500 else
34502 if (VECTOR_MODE_P (mode))
34503 op = safe_vector_operand (op, mode);
34505 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34507 if (optimize || !match)
34508 op = copy_to_mode_reg (mode, op);
34510 else
34512 op = copy_to_reg (op);
34513 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34517 args[i].op = op;
34518 args[i].mode = mode;
34521 switch (nargs)
34523 case 1:
34524 pat = GEN_FCN (icode) (target, args[0].op);
34525 break;
34526 case 2:
34527 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34528 break;
34529 case 3:
34530 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34531 args[2].op);
34532 break;
34533 case 4:
34534 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34535 args[2].op, args[3].op);
34536 break;
34537 case 5:
34538 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34539 args[2].op, args[3].op, args[4].op);
34540 case 6:
34541 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34542 args[2].op, args[3].op, args[4].op,
34543 args[5].op);
34544 break;
34545 default:
34546 gcc_unreachable ();
34549 if (!pat)
34550 return 0;
34552 if (redundant_embed_rnd)
34553 pat = ix86_erase_embedded_rounding (pat);
34555 emit_insn (pat);
34556 return target;
34559 /* Subroutine of ix86_expand_builtin to take care of special insns
34560 with variable number of operands. */
34562 static rtx
34563 ix86_expand_special_args_builtin (const struct builtin_description *d,
34564 tree exp, rtx target)
34566 tree arg;
34567 rtx pat, op;
34568 unsigned int i, nargs, arg_adjust, memory;
34569 bool aligned_mem = false;
34570 struct
34572 rtx op;
34573 enum machine_mode mode;
34574 } args[3];
34575 enum insn_code icode = d->icode;
34576 bool last_arg_constant = false;
34577 const struct insn_data_d *insn_p = &insn_data[icode];
34578 enum machine_mode tmode = insn_p->operand[0].mode;
34579 enum { load, store } klass;
34581 switch ((enum ix86_builtin_func_type) d->flag)
34583 case VOID_FTYPE_VOID:
34584 emit_insn (GEN_FCN (icode) (target));
34585 return 0;
34586 case VOID_FTYPE_UINT64:
34587 case VOID_FTYPE_UNSIGNED:
34588 nargs = 0;
34589 klass = store;
34590 memory = 0;
34591 break;
34593 case INT_FTYPE_VOID:
34594 case UINT64_FTYPE_VOID:
34595 case UNSIGNED_FTYPE_VOID:
34596 nargs = 0;
34597 klass = load;
34598 memory = 0;
34599 break;
34600 case UINT64_FTYPE_PUNSIGNED:
34601 case V2DI_FTYPE_PV2DI:
34602 case V4DI_FTYPE_PV4DI:
34603 case V32QI_FTYPE_PCCHAR:
34604 case V16QI_FTYPE_PCCHAR:
34605 case V8SF_FTYPE_PCV4SF:
34606 case V8SF_FTYPE_PCFLOAT:
34607 case V4SF_FTYPE_PCFLOAT:
34608 case V4DF_FTYPE_PCV2DF:
34609 case V4DF_FTYPE_PCDOUBLE:
34610 case V2DF_FTYPE_PCDOUBLE:
34611 case VOID_FTYPE_PVOID:
34612 case V16SI_FTYPE_PV4SI:
34613 case V16SF_FTYPE_PV4SF:
34614 case V8DI_FTYPE_PV4DI:
34615 case V8DI_FTYPE_PV8DI:
34616 case V8DF_FTYPE_PV4DF:
34617 nargs = 1;
34618 klass = load;
34619 memory = 0;
34620 switch (icode)
34622 case CODE_FOR_sse4_1_movntdqa:
34623 case CODE_FOR_avx2_movntdqa:
34624 case CODE_FOR_avx512f_movntdqa:
34625 aligned_mem = true;
34626 break;
34627 default:
34628 break;
34630 break;
34631 case VOID_FTYPE_PV2SF_V4SF:
34632 case VOID_FTYPE_PV8DI_V8DI:
34633 case VOID_FTYPE_PV4DI_V4DI:
34634 case VOID_FTYPE_PV2DI_V2DI:
34635 case VOID_FTYPE_PCHAR_V32QI:
34636 case VOID_FTYPE_PCHAR_V16QI:
34637 case VOID_FTYPE_PFLOAT_V16SF:
34638 case VOID_FTYPE_PFLOAT_V8SF:
34639 case VOID_FTYPE_PFLOAT_V4SF:
34640 case VOID_FTYPE_PDOUBLE_V8DF:
34641 case VOID_FTYPE_PDOUBLE_V4DF:
34642 case VOID_FTYPE_PDOUBLE_V2DF:
34643 case VOID_FTYPE_PLONGLONG_LONGLONG:
34644 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34645 case VOID_FTYPE_PINT_INT:
34646 nargs = 1;
34647 klass = store;
34648 /* Reserve memory operand for target. */
34649 memory = ARRAY_SIZE (args);
34650 switch (icode)
34652 /* These builtins and instructions require the memory
34653 to be properly aligned. */
34654 case CODE_FOR_avx_movntv4di:
34655 case CODE_FOR_sse2_movntv2di:
34656 case CODE_FOR_avx_movntv8sf:
34657 case CODE_FOR_sse_movntv4sf:
34658 case CODE_FOR_sse4a_vmmovntv4sf:
34659 case CODE_FOR_avx_movntv4df:
34660 case CODE_FOR_sse2_movntv2df:
34661 case CODE_FOR_sse4a_vmmovntv2df:
34662 case CODE_FOR_sse2_movntidi:
34663 case CODE_FOR_sse_movntq:
34664 case CODE_FOR_sse2_movntisi:
34665 case CODE_FOR_avx512f_movntv16sf:
34666 case CODE_FOR_avx512f_movntv8df:
34667 case CODE_FOR_avx512f_movntv8di:
34668 aligned_mem = true;
34669 break;
34670 default:
34671 break;
34673 break;
34674 case V4SF_FTYPE_V4SF_PCV2SF:
34675 case V2DF_FTYPE_V2DF_PCDOUBLE:
34676 nargs = 2;
34677 klass = load;
34678 memory = 1;
34679 break;
34680 case V8SF_FTYPE_PCV8SF_V8SI:
34681 case V4DF_FTYPE_PCV4DF_V4DI:
34682 case V4SF_FTYPE_PCV4SF_V4SI:
34683 case V2DF_FTYPE_PCV2DF_V2DI:
34684 case V8SI_FTYPE_PCV8SI_V8SI:
34685 case V4DI_FTYPE_PCV4DI_V4DI:
34686 case V4SI_FTYPE_PCV4SI_V4SI:
34687 case V2DI_FTYPE_PCV2DI_V2DI:
34688 nargs = 2;
34689 klass = load;
34690 memory = 0;
34691 break;
34692 case VOID_FTYPE_PV8DF_V8DF_QI:
34693 case VOID_FTYPE_PV16SF_V16SF_HI:
34694 case VOID_FTYPE_PV8DI_V8DI_QI:
34695 case VOID_FTYPE_PV16SI_V16SI_HI:
34696 switch (icode)
34698 /* These builtins and instructions require the memory
34699 to be properly aligned. */
34700 case CODE_FOR_avx512f_storev16sf_mask:
34701 case CODE_FOR_avx512f_storev16si_mask:
34702 case CODE_FOR_avx512f_storev8df_mask:
34703 case CODE_FOR_avx512f_storev8di_mask:
34704 aligned_mem = true;
34705 break;
34706 default:
34707 break;
34709 /* FALLTHRU */
34710 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34711 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34712 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34713 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34714 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34715 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34716 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34717 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34718 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34719 case VOID_FTYPE_PFLOAT_V4SF_QI:
34720 case VOID_FTYPE_PV8SI_V8DI_QI:
34721 case VOID_FTYPE_PV8HI_V8DI_QI:
34722 case VOID_FTYPE_PV16HI_V16SI_HI:
34723 case VOID_FTYPE_PV16QI_V8DI_QI:
34724 case VOID_FTYPE_PV16QI_V16SI_HI:
34725 nargs = 2;
34726 klass = store;
34727 /* Reserve memory operand for target. */
34728 memory = ARRAY_SIZE (args);
34729 break;
34730 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34731 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34732 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34733 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34734 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34735 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34736 nargs = 3;
34737 klass = load;
34738 memory = 0;
34739 switch (icode)
34741 /* These builtins and instructions require the memory
34742 to be properly aligned. */
34743 case CODE_FOR_avx512f_loadv16sf_mask:
34744 case CODE_FOR_avx512f_loadv16si_mask:
34745 case CODE_FOR_avx512f_loadv8df_mask:
34746 case CODE_FOR_avx512f_loadv8di_mask:
34747 aligned_mem = true;
34748 break;
34749 default:
34750 break;
34752 break;
34753 case VOID_FTYPE_UINT_UINT_UINT:
34754 case VOID_FTYPE_UINT64_UINT_UINT:
34755 case UCHAR_FTYPE_UINT_UINT_UINT:
34756 case UCHAR_FTYPE_UINT64_UINT_UINT:
34757 nargs = 3;
34758 klass = load;
34759 memory = ARRAY_SIZE (args);
34760 last_arg_constant = true;
34761 break;
34762 default:
34763 gcc_unreachable ();
34766 gcc_assert (nargs <= ARRAY_SIZE (args));
34768 if (klass == store)
34770 arg = CALL_EXPR_ARG (exp, 0);
34771 op = expand_normal (arg);
34772 gcc_assert (target == 0);
34773 if (memory)
34775 op = ix86_zero_extend_to_Pmode (op);
34776 target = gen_rtx_MEM (tmode, op);
34777 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34778 on it. Try to improve it using get_pointer_alignment,
34779 and if the special builtin is one that requires strict
34780 mode alignment, also from it's GET_MODE_ALIGNMENT.
34781 Failure to do so could lead to ix86_legitimate_combined_insn
34782 rejecting all changes to such insns. */
34783 unsigned int align = get_pointer_alignment (arg);
34784 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34785 align = GET_MODE_ALIGNMENT (tmode);
34786 if (MEM_ALIGN (target) < align)
34787 set_mem_align (target, align);
34789 else
34790 target = force_reg (tmode, op);
34791 arg_adjust = 1;
34793 else
34795 arg_adjust = 0;
34796 if (optimize
34797 || target == 0
34798 || !register_operand (target, tmode)
34799 || GET_MODE (target) != tmode)
34800 target = gen_reg_rtx (tmode);
34803 for (i = 0; i < nargs; i++)
34805 enum machine_mode mode = insn_p->operand[i + 1].mode;
34806 bool match;
34808 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34809 op = expand_normal (arg);
34810 match = insn_p->operand[i + 1].predicate (op, mode);
34812 if (last_arg_constant && (i + 1) == nargs)
34814 if (!match)
34816 if (icode == CODE_FOR_lwp_lwpvalsi3
34817 || icode == CODE_FOR_lwp_lwpinssi3
34818 || icode == CODE_FOR_lwp_lwpvaldi3
34819 || icode == CODE_FOR_lwp_lwpinsdi3)
34820 error ("the last argument must be a 32-bit immediate");
34821 else
34822 error ("the last argument must be an 8-bit immediate");
34823 return const0_rtx;
34826 else
34828 if (i == memory)
34830 /* This must be the memory operand. */
34831 op = ix86_zero_extend_to_Pmode (op);
34832 op = gen_rtx_MEM (mode, op);
34833 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34834 on it. Try to improve it using get_pointer_alignment,
34835 and if the special builtin is one that requires strict
34836 mode alignment, also from it's GET_MODE_ALIGNMENT.
34837 Failure to do so could lead to ix86_legitimate_combined_insn
34838 rejecting all changes to such insns. */
34839 unsigned int align = get_pointer_alignment (arg);
34840 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34841 align = GET_MODE_ALIGNMENT (mode);
34842 if (MEM_ALIGN (op) < align)
34843 set_mem_align (op, align);
34845 else
34847 /* This must be register. */
34848 if (VECTOR_MODE_P (mode))
34849 op = safe_vector_operand (op, mode);
34851 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34852 op = copy_to_mode_reg (mode, op);
34853 else
34855 op = copy_to_reg (op);
34856 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34861 args[i].op = op;
34862 args[i].mode = mode;
34865 switch (nargs)
34867 case 0:
34868 pat = GEN_FCN (icode) (target);
34869 break;
34870 case 1:
34871 pat = GEN_FCN (icode) (target, args[0].op);
34872 break;
34873 case 2:
34874 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34875 break;
34876 case 3:
34877 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34878 break;
34879 default:
34880 gcc_unreachable ();
34883 if (! pat)
34884 return 0;
34885 emit_insn (pat);
34886 return klass == store ? 0 : target;
34889 /* Return the integer constant in ARG. Constrain it to be in the range
34890 of the subparts of VEC_TYPE; issue an error if not. */
34892 static int
34893 get_element_number (tree vec_type, tree arg)
34895 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34897 if (!tree_fits_uhwi_p (arg)
34898 || (elt = tree_to_uhwi (arg), elt > max))
34900 error ("selector must be an integer constant in the range 0..%wi", max);
34901 return 0;
34904 return elt;
34907 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34908 ix86_expand_vector_init. We DO have language-level syntax for this, in
34909 the form of (type){ init-list }. Except that since we can't place emms
34910 instructions from inside the compiler, we can't allow the use of MMX
34911 registers unless the user explicitly asks for it. So we do *not* define
34912 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34913 we have builtins invoked by mmintrin.h that gives us license to emit
34914 these sorts of instructions. */
34916 static rtx
34917 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34919 enum machine_mode tmode = TYPE_MODE (type);
34920 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34921 int i, n_elt = GET_MODE_NUNITS (tmode);
34922 rtvec v = rtvec_alloc (n_elt);
34924 gcc_assert (VECTOR_MODE_P (tmode));
34925 gcc_assert (call_expr_nargs (exp) == n_elt);
34927 for (i = 0; i < n_elt; ++i)
34929 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34930 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34933 if (!target || !register_operand (target, tmode))
34934 target = gen_reg_rtx (tmode);
34936 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34937 return target;
34940 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34941 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34942 had a language-level syntax for referencing vector elements. */
34944 static rtx
34945 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34947 enum machine_mode tmode, mode0;
34948 tree arg0, arg1;
34949 int elt;
34950 rtx op0;
34952 arg0 = CALL_EXPR_ARG (exp, 0);
34953 arg1 = CALL_EXPR_ARG (exp, 1);
34955 op0 = expand_normal (arg0);
34956 elt = get_element_number (TREE_TYPE (arg0), arg1);
34958 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34959 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34960 gcc_assert (VECTOR_MODE_P (mode0));
34962 op0 = force_reg (mode0, op0);
34964 if (optimize || !target || !register_operand (target, tmode))
34965 target = gen_reg_rtx (tmode);
34967 ix86_expand_vector_extract (true, target, op0, elt);
34969 return target;
34972 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34973 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34974 a language-level syntax for referencing vector elements. */
34976 static rtx
34977 ix86_expand_vec_set_builtin (tree exp)
34979 enum machine_mode tmode, mode1;
34980 tree arg0, arg1, arg2;
34981 int elt;
34982 rtx op0, op1, target;
34984 arg0 = CALL_EXPR_ARG (exp, 0);
34985 arg1 = CALL_EXPR_ARG (exp, 1);
34986 arg2 = CALL_EXPR_ARG (exp, 2);
34988 tmode = TYPE_MODE (TREE_TYPE (arg0));
34989 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34990 gcc_assert (VECTOR_MODE_P (tmode));
34992 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34993 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34994 elt = get_element_number (TREE_TYPE (arg0), arg2);
34996 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34997 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34999 op0 = force_reg (tmode, op0);
35000 op1 = force_reg (mode1, op1);
35002 /* OP0 is the source of these builtin functions and shouldn't be
35003 modified. Create a copy, use it and return it as target. */
35004 target = gen_reg_rtx (tmode);
35005 emit_move_insn (target, op0);
35006 ix86_expand_vector_set (true, target, op1, elt);
35008 return target;
35011 /* Expand an expression EXP that calls a built-in function,
35012 with result going to TARGET if that's convenient
35013 (and in mode MODE if that's convenient).
35014 SUBTARGET may be used as the target for computing one of EXP's operands.
35015 IGNORE is nonzero if the value is to be ignored. */
35017 static rtx
35018 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35019 enum machine_mode mode, int ignore)
35021 const struct builtin_description *d;
35022 size_t i;
35023 enum insn_code icode;
35024 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35025 tree arg0, arg1, arg2, arg3, arg4;
35026 rtx op0, op1, op2, op3, op4, pat, insn;
35027 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35028 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35030 /* For CPU builtins that can be folded, fold first and expand the fold. */
35031 switch (fcode)
35033 case IX86_BUILTIN_CPU_INIT:
35035 /* Make it call __cpu_indicator_init in libgcc. */
35036 tree call_expr, fndecl, type;
35037 type = build_function_type_list (integer_type_node, NULL_TREE);
35038 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35039 call_expr = build_call_expr (fndecl, 0);
35040 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35042 case IX86_BUILTIN_CPU_IS:
35043 case IX86_BUILTIN_CPU_SUPPORTS:
35045 tree arg0 = CALL_EXPR_ARG (exp, 0);
35046 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35047 gcc_assert (fold_expr != NULL_TREE);
35048 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35052 /* Determine whether the builtin function is available under the current ISA.
35053 Originally the builtin was not created if it wasn't applicable to the
35054 current ISA based on the command line switches. With function specific
35055 options, we need to check in the context of the function making the call
35056 whether it is supported. */
35057 if (ix86_builtins_isa[fcode].isa
35058 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35060 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35061 NULL, (enum fpmath_unit) 0, false);
35063 if (!opts)
35064 error ("%qE needs unknown isa option", fndecl);
35065 else
35067 gcc_assert (opts != NULL);
35068 error ("%qE needs isa option %s", fndecl, opts);
35069 free (opts);
35071 return const0_rtx;
35074 switch (fcode)
35076 case IX86_BUILTIN_MASKMOVQ:
35077 case IX86_BUILTIN_MASKMOVDQU:
35078 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35079 ? CODE_FOR_mmx_maskmovq
35080 : CODE_FOR_sse2_maskmovdqu);
35081 /* Note the arg order is different from the operand order. */
35082 arg1 = CALL_EXPR_ARG (exp, 0);
35083 arg2 = CALL_EXPR_ARG (exp, 1);
35084 arg0 = CALL_EXPR_ARG (exp, 2);
35085 op0 = expand_normal (arg0);
35086 op1 = expand_normal (arg1);
35087 op2 = expand_normal (arg2);
35088 mode0 = insn_data[icode].operand[0].mode;
35089 mode1 = insn_data[icode].operand[1].mode;
35090 mode2 = insn_data[icode].operand[2].mode;
35092 op0 = ix86_zero_extend_to_Pmode (op0);
35093 op0 = gen_rtx_MEM (mode1, op0);
35095 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35096 op0 = copy_to_mode_reg (mode0, op0);
35097 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35098 op1 = copy_to_mode_reg (mode1, op1);
35099 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35100 op2 = copy_to_mode_reg (mode2, op2);
35101 pat = GEN_FCN (icode) (op0, op1, op2);
35102 if (! pat)
35103 return 0;
35104 emit_insn (pat);
35105 return 0;
35107 case IX86_BUILTIN_LDMXCSR:
35108 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35109 target = assign_386_stack_local (SImode, SLOT_TEMP);
35110 emit_move_insn (target, op0);
35111 emit_insn (gen_sse_ldmxcsr (target));
35112 return 0;
35114 case IX86_BUILTIN_STMXCSR:
35115 target = assign_386_stack_local (SImode, SLOT_TEMP);
35116 emit_insn (gen_sse_stmxcsr (target));
35117 return copy_to_mode_reg (SImode, target);
35119 case IX86_BUILTIN_CLFLUSH:
35120 arg0 = CALL_EXPR_ARG (exp, 0);
35121 op0 = expand_normal (arg0);
35122 icode = CODE_FOR_sse2_clflush;
35123 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35124 op0 = ix86_zero_extend_to_Pmode (op0);
35126 emit_insn (gen_sse2_clflush (op0));
35127 return 0;
35129 case IX86_BUILTIN_CLFLUSHOPT:
35130 arg0 = CALL_EXPR_ARG (exp, 0);
35131 op0 = expand_normal (arg0);
35132 icode = CODE_FOR_clflushopt;
35133 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35134 op0 = ix86_zero_extend_to_Pmode (op0);
35136 emit_insn (gen_clflushopt (op0));
35137 return 0;
35139 case IX86_BUILTIN_MONITOR:
35140 arg0 = CALL_EXPR_ARG (exp, 0);
35141 arg1 = CALL_EXPR_ARG (exp, 1);
35142 arg2 = CALL_EXPR_ARG (exp, 2);
35143 op0 = expand_normal (arg0);
35144 op1 = expand_normal (arg1);
35145 op2 = expand_normal (arg2);
35146 if (!REG_P (op0))
35147 op0 = ix86_zero_extend_to_Pmode (op0);
35148 if (!REG_P (op1))
35149 op1 = copy_to_mode_reg (SImode, op1);
35150 if (!REG_P (op2))
35151 op2 = copy_to_mode_reg (SImode, op2);
35152 emit_insn (ix86_gen_monitor (op0, op1, op2));
35153 return 0;
35155 case IX86_BUILTIN_MWAIT:
35156 arg0 = CALL_EXPR_ARG (exp, 0);
35157 arg1 = CALL_EXPR_ARG (exp, 1);
35158 op0 = expand_normal (arg0);
35159 op1 = expand_normal (arg1);
35160 if (!REG_P (op0))
35161 op0 = copy_to_mode_reg (SImode, op0);
35162 if (!REG_P (op1))
35163 op1 = copy_to_mode_reg (SImode, op1);
35164 emit_insn (gen_sse3_mwait (op0, op1));
35165 return 0;
35167 case IX86_BUILTIN_VEC_INIT_V2SI:
35168 case IX86_BUILTIN_VEC_INIT_V4HI:
35169 case IX86_BUILTIN_VEC_INIT_V8QI:
35170 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35172 case IX86_BUILTIN_VEC_EXT_V2DF:
35173 case IX86_BUILTIN_VEC_EXT_V2DI:
35174 case IX86_BUILTIN_VEC_EXT_V4SF:
35175 case IX86_BUILTIN_VEC_EXT_V4SI:
35176 case IX86_BUILTIN_VEC_EXT_V8HI:
35177 case IX86_BUILTIN_VEC_EXT_V2SI:
35178 case IX86_BUILTIN_VEC_EXT_V4HI:
35179 case IX86_BUILTIN_VEC_EXT_V16QI:
35180 return ix86_expand_vec_ext_builtin (exp, target);
35182 case IX86_BUILTIN_VEC_SET_V2DI:
35183 case IX86_BUILTIN_VEC_SET_V4SF:
35184 case IX86_BUILTIN_VEC_SET_V4SI:
35185 case IX86_BUILTIN_VEC_SET_V8HI:
35186 case IX86_BUILTIN_VEC_SET_V4HI:
35187 case IX86_BUILTIN_VEC_SET_V16QI:
35188 return ix86_expand_vec_set_builtin (exp);
35190 case IX86_BUILTIN_INFQ:
35191 case IX86_BUILTIN_HUGE_VALQ:
35193 REAL_VALUE_TYPE inf;
35194 rtx tmp;
35196 real_inf (&inf);
35197 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35199 tmp = validize_mem (force_const_mem (mode, tmp));
35201 if (target == 0)
35202 target = gen_reg_rtx (mode);
35204 emit_move_insn (target, tmp);
35205 return target;
35208 case IX86_BUILTIN_RDPMC:
35209 case IX86_BUILTIN_RDTSC:
35210 case IX86_BUILTIN_RDTSCP:
35212 op0 = gen_reg_rtx (DImode);
35213 op1 = gen_reg_rtx (DImode);
35215 if (fcode == IX86_BUILTIN_RDPMC)
35217 arg0 = CALL_EXPR_ARG (exp, 0);
35218 op2 = expand_normal (arg0);
35219 if (!register_operand (op2, SImode))
35220 op2 = copy_to_mode_reg (SImode, op2);
35222 insn = (TARGET_64BIT
35223 ? gen_rdpmc_rex64 (op0, op1, op2)
35224 : gen_rdpmc (op0, op2));
35225 emit_insn (insn);
35227 else if (fcode == IX86_BUILTIN_RDTSC)
35229 insn = (TARGET_64BIT
35230 ? gen_rdtsc_rex64 (op0, op1)
35231 : gen_rdtsc (op0));
35232 emit_insn (insn);
35234 else
35236 op2 = gen_reg_rtx (SImode);
35238 insn = (TARGET_64BIT
35239 ? gen_rdtscp_rex64 (op0, op1, op2)
35240 : gen_rdtscp (op0, op2));
35241 emit_insn (insn);
35243 arg0 = CALL_EXPR_ARG (exp, 0);
35244 op4 = expand_normal (arg0);
35245 if (!address_operand (op4, VOIDmode))
35247 op4 = convert_memory_address (Pmode, op4);
35248 op4 = copy_addr_to_reg (op4);
35250 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35253 if (target == 0)
35255 /* mode is VOIDmode if __builtin_rd* has been called
35256 without lhs. */
35257 if (mode == VOIDmode)
35258 return target;
35259 target = gen_reg_rtx (mode);
35262 if (TARGET_64BIT)
35264 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35265 op1, 1, OPTAB_DIRECT);
35266 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35267 op0, 1, OPTAB_DIRECT);
35270 emit_move_insn (target, op0);
35271 return target;
35273 case IX86_BUILTIN_FXSAVE:
35274 case IX86_BUILTIN_FXRSTOR:
35275 case IX86_BUILTIN_FXSAVE64:
35276 case IX86_BUILTIN_FXRSTOR64:
35277 case IX86_BUILTIN_FNSTENV:
35278 case IX86_BUILTIN_FLDENV:
35279 case IX86_BUILTIN_FNSTSW:
35280 mode0 = BLKmode;
35281 switch (fcode)
35283 case IX86_BUILTIN_FXSAVE:
35284 icode = CODE_FOR_fxsave;
35285 break;
35286 case IX86_BUILTIN_FXRSTOR:
35287 icode = CODE_FOR_fxrstor;
35288 break;
35289 case IX86_BUILTIN_FXSAVE64:
35290 icode = CODE_FOR_fxsave64;
35291 break;
35292 case IX86_BUILTIN_FXRSTOR64:
35293 icode = CODE_FOR_fxrstor64;
35294 break;
35295 case IX86_BUILTIN_FNSTENV:
35296 icode = CODE_FOR_fnstenv;
35297 break;
35298 case IX86_BUILTIN_FLDENV:
35299 icode = CODE_FOR_fldenv;
35300 break;
35301 case IX86_BUILTIN_FNSTSW:
35302 icode = CODE_FOR_fnstsw;
35303 mode0 = HImode;
35304 break;
35305 default:
35306 gcc_unreachable ();
35309 arg0 = CALL_EXPR_ARG (exp, 0);
35310 op0 = expand_normal (arg0);
35312 if (!address_operand (op0, VOIDmode))
35314 op0 = convert_memory_address (Pmode, op0);
35315 op0 = copy_addr_to_reg (op0);
35317 op0 = gen_rtx_MEM (mode0, op0);
35319 pat = GEN_FCN (icode) (op0);
35320 if (pat)
35321 emit_insn (pat);
35322 return 0;
35324 case IX86_BUILTIN_XSAVE:
35325 case IX86_BUILTIN_XRSTOR:
35326 case IX86_BUILTIN_XSAVE64:
35327 case IX86_BUILTIN_XRSTOR64:
35328 case IX86_BUILTIN_XSAVEOPT:
35329 case IX86_BUILTIN_XSAVEOPT64:
35330 case IX86_BUILTIN_XSAVES:
35331 case IX86_BUILTIN_XRSTORS:
35332 case IX86_BUILTIN_XSAVES64:
35333 case IX86_BUILTIN_XRSTORS64:
35334 case IX86_BUILTIN_XSAVEC:
35335 case IX86_BUILTIN_XSAVEC64:
35336 arg0 = CALL_EXPR_ARG (exp, 0);
35337 arg1 = CALL_EXPR_ARG (exp, 1);
35338 op0 = expand_normal (arg0);
35339 op1 = expand_normal (arg1);
35341 if (!address_operand (op0, VOIDmode))
35343 op0 = convert_memory_address (Pmode, op0);
35344 op0 = copy_addr_to_reg (op0);
35346 op0 = gen_rtx_MEM (BLKmode, op0);
35348 op1 = force_reg (DImode, op1);
35350 if (TARGET_64BIT)
35352 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35353 NULL, 1, OPTAB_DIRECT);
35354 switch (fcode)
35356 case IX86_BUILTIN_XSAVE:
35357 icode = CODE_FOR_xsave_rex64;
35358 break;
35359 case IX86_BUILTIN_XRSTOR:
35360 icode = CODE_FOR_xrstor_rex64;
35361 break;
35362 case IX86_BUILTIN_XSAVE64:
35363 icode = CODE_FOR_xsave64;
35364 break;
35365 case IX86_BUILTIN_XRSTOR64:
35366 icode = CODE_FOR_xrstor64;
35367 break;
35368 case IX86_BUILTIN_XSAVEOPT:
35369 icode = CODE_FOR_xsaveopt_rex64;
35370 break;
35371 case IX86_BUILTIN_XSAVEOPT64:
35372 icode = CODE_FOR_xsaveopt64;
35373 break;
35374 case IX86_BUILTIN_XSAVES:
35375 icode = CODE_FOR_xsaves_rex64;
35376 break;
35377 case IX86_BUILTIN_XRSTORS:
35378 icode = CODE_FOR_xrstors_rex64;
35379 break;
35380 case IX86_BUILTIN_XSAVES64:
35381 icode = CODE_FOR_xsaves64;
35382 break;
35383 case IX86_BUILTIN_XRSTORS64:
35384 icode = CODE_FOR_xrstors64;
35385 break;
35386 case IX86_BUILTIN_XSAVEC:
35387 icode = CODE_FOR_xsavec_rex64;
35388 break;
35389 case IX86_BUILTIN_XSAVEC64:
35390 icode = CODE_FOR_xsavec64;
35391 break;
35392 default:
35393 gcc_unreachable ();
35396 op2 = gen_lowpart (SImode, op2);
35397 op1 = gen_lowpart (SImode, op1);
35398 pat = GEN_FCN (icode) (op0, op1, op2);
35400 else
35402 switch (fcode)
35404 case IX86_BUILTIN_XSAVE:
35405 icode = CODE_FOR_xsave;
35406 break;
35407 case IX86_BUILTIN_XRSTOR:
35408 icode = CODE_FOR_xrstor;
35409 break;
35410 case IX86_BUILTIN_XSAVEOPT:
35411 icode = CODE_FOR_xsaveopt;
35412 break;
35413 case IX86_BUILTIN_XSAVES:
35414 icode = CODE_FOR_xsaves;
35415 break;
35416 case IX86_BUILTIN_XRSTORS:
35417 icode = CODE_FOR_xrstors;
35418 break;
35419 case IX86_BUILTIN_XSAVEC:
35420 icode = CODE_FOR_xsavec;
35421 break;
35422 default:
35423 gcc_unreachable ();
35425 pat = GEN_FCN (icode) (op0, op1);
35428 if (pat)
35429 emit_insn (pat);
35430 return 0;
35432 case IX86_BUILTIN_LLWPCB:
35433 arg0 = CALL_EXPR_ARG (exp, 0);
35434 op0 = expand_normal (arg0);
35435 icode = CODE_FOR_lwp_llwpcb;
35436 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35437 op0 = ix86_zero_extend_to_Pmode (op0);
35438 emit_insn (gen_lwp_llwpcb (op0));
35439 return 0;
35441 case IX86_BUILTIN_SLWPCB:
35442 icode = CODE_FOR_lwp_slwpcb;
35443 if (!target
35444 || !insn_data[icode].operand[0].predicate (target, Pmode))
35445 target = gen_reg_rtx (Pmode);
35446 emit_insn (gen_lwp_slwpcb (target));
35447 return target;
35449 case IX86_BUILTIN_BEXTRI32:
35450 case IX86_BUILTIN_BEXTRI64:
35451 arg0 = CALL_EXPR_ARG (exp, 0);
35452 arg1 = CALL_EXPR_ARG (exp, 1);
35453 op0 = expand_normal (arg0);
35454 op1 = expand_normal (arg1);
35455 icode = (fcode == IX86_BUILTIN_BEXTRI32
35456 ? CODE_FOR_tbm_bextri_si
35457 : CODE_FOR_tbm_bextri_di);
35458 if (!CONST_INT_P (op1))
35460 error ("last argument must be an immediate");
35461 return const0_rtx;
35463 else
35465 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35466 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35467 op1 = GEN_INT (length);
35468 op2 = GEN_INT (lsb_index);
35469 pat = GEN_FCN (icode) (target, op0, op1, op2);
35470 if (pat)
35471 emit_insn (pat);
35472 return target;
35475 case IX86_BUILTIN_RDRAND16_STEP:
35476 icode = CODE_FOR_rdrandhi_1;
35477 mode0 = HImode;
35478 goto rdrand_step;
35480 case IX86_BUILTIN_RDRAND32_STEP:
35481 icode = CODE_FOR_rdrandsi_1;
35482 mode0 = SImode;
35483 goto rdrand_step;
35485 case IX86_BUILTIN_RDRAND64_STEP:
35486 icode = CODE_FOR_rdranddi_1;
35487 mode0 = DImode;
35489 rdrand_step:
35490 op0 = gen_reg_rtx (mode0);
35491 emit_insn (GEN_FCN (icode) (op0));
35493 arg0 = CALL_EXPR_ARG (exp, 0);
35494 op1 = expand_normal (arg0);
35495 if (!address_operand (op1, VOIDmode))
35497 op1 = convert_memory_address (Pmode, op1);
35498 op1 = copy_addr_to_reg (op1);
35500 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35502 op1 = gen_reg_rtx (SImode);
35503 emit_move_insn (op1, CONST1_RTX (SImode));
35505 /* Emit SImode conditional move. */
35506 if (mode0 == HImode)
35508 op2 = gen_reg_rtx (SImode);
35509 emit_insn (gen_zero_extendhisi2 (op2, op0));
35511 else if (mode0 == SImode)
35512 op2 = op0;
35513 else
35514 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35516 if (target == 0
35517 || !register_operand (target, SImode))
35518 target = gen_reg_rtx (SImode);
35520 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35521 const0_rtx);
35522 emit_insn (gen_rtx_SET (VOIDmode, target,
35523 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35524 return target;
35526 case IX86_BUILTIN_RDSEED16_STEP:
35527 icode = CODE_FOR_rdseedhi_1;
35528 mode0 = HImode;
35529 goto rdseed_step;
35531 case IX86_BUILTIN_RDSEED32_STEP:
35532 icode = CODE_FOR_rdseedsi_1;
35533 mode0 = SImode;
35534 goto rdseed_step;
35536 case IX86_BUILTIN_RDSEED64_STEP:
35537 icode = CODE_FOR_rdseeddi_1;
35538 mode0 = DImode;
35540 rdseed_step:
35541 op0 = gen_reg_rtx (mode0);
35542 emit_insn (GEN_FCN (icode) (op0));
35544 arg0 = CALL_EXPR_ARG (exp, 0);
35545 op1 = expand_normal (arg0);
35546 if (!address_operand (op1, VOIDmode))
35548 op1 = convert_memory_address (Pmode, op1);
35549 op1 = copy_addr_to_reg (op1);
35551 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35553 op2 = gen_reg_rtx (QImode);
35555 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35556 const0_rtx);
35557 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35559 if (target == 0
35560 || !register_operand (target, SImode))
35561 target = gen_reg_rtx (SImode);
35563 emit_insn (gen_zero_extendqisi2 (target, op2));
35564 return target;
35566 case IX86_BUILTIN_ADDCARRYX32:
35567 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35568 mode0 = SImode;
35569 goto addcarryx;
35571 case IX86_BUILTIN_ADDCARRYX64:
35572 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35573 mode0 = DImode;
35575 addcarryx:
35576 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35577 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35578 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35579 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35581 op0 = gen_reg_rtx (QImode);
35583 /* Generate CF from input operand. */
35584 op1 = expand_normal (arg0);
35585 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35586 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35588 /* Gen ADCX instruction to compute X+Y+CF. */
35589 op2 = expand_normal (arg1);
35590 op3 = expand_normal (arg2);
35592 if (!REG_P (op2))
35593 op2 = copy_to_mode_reg (mode0, op2);
35594 if (!REG_P (op3))
35595 op3 = copy_to_mode_reg (mode0, op3);
35597 op0 = gen_reg_rtx (mode0);
35599 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35600 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35601 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35603 /* Store the result. */
35604 op4 = expand_normal (arg3);
35605 if (!address_operand (op4, VOIDmode))
35607 op4 = convert_memory_address (Pmode, op4);
35608 op4 = copy_addr_to_reg (op4);
35610 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35612 /* Return current CF value. */
35613 if (target == 0)
35614 target = gen_reg_rtx (QImode);
35616 PUT_MODE (pat, QImode);
35617 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35618 return target;
35620 case IX86_BUILTIN_READ_FLAGS:
35621 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35623 if (optimize
35624 || target == NULL_RTX
35625 || !nonimmediate_operand (target, word_mode)
35626 || GET_MODE (target) != word_mode)
35627 target = gen_reg_rtx (word_mode);
35629 emit_insn (gen_pop (target));
35630 return target;
35632 case IX86_BUILTIN_WRITE_FLAGS:
35634 arg0 = CALL_EXPR_ARG (exp, 0);
35635 op0 = expand_normal (arg0);
35636 if (!general_no_elim_operand (op0, word_mode))
35637 op0 = copy_to_mode_reg (word_mode, op0);
35639 emit_insn (gen_push (op0));
35640 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35641 return 0;
35643 case IX86_BUILTIN_KORTESTC16:
35644 icode = CODE_FOR_kortestchi;
35645 mode0 = HImode;
35646 mode1 = CCCmode;
35647 goto kortest;
35649 case IX86_BUILTIN_KORTESTZ16:
35650 icode = CODE_FOR_kortestzhi;
35651 mode0 = HImode;
35652 mode1 = CCZmode;
35654 kortest:
35655 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35656 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35657 op0 = expand_normal (arg0);
35658 op1 = expand_normal (arg1);
35660 op0 = copy_to_reg (op0);
35661 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35662 op1 = copy_to_reg (op1);
35663 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35665 target = gen_reg_rtx (QImode);
35666 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35668 /* Emit kortest. */
35669 emit_insn (GEN_FCN (icode) (op0, op1));
35670 /* And use setcc to return result from flags. */
35671 ix86_expand_setcc (target, EQ,
35672 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35673 return target;
35675 case IX86_BUILTIN_GATHERSIV2DF:
35676 icode = CODE_FOR_avx2_gathersiv2df;
35677 goto gather_gen;
35678 case IX86_BUILTIN_GATHERSIV4DF:
35679 icode = CODE_FOR_avx2_gathersiv4df;
35680 goto gather_gen;
35681 case IX86_BUILTIN_GATHERDIV2DF:
35682 icode = CODE_FOR_avx2_gatherdiv2df;
35683 goto gather_gen;
35684 case IX86_BUILTIN_GATHERDIV4DF:
35685 icode = CODE_FOR_avx2_gatherdiv4df;
35686 goto gather_gen;
35687 case IX86_BUILTIN_GATHERSIV4SF:
35688 icode = CODE_FOR_avx2_gathersiv4sf;
35689 goto gather_gen;
35690 case IX86_BUILTIN_GATHERSIV8SF:
35691 icode = CODE_FOR_avx2_gathersiv8sf;
35692 goto gather_gen;
35693 case IX86_BUILTIN_GATHERDIV4SF:
35694 icode = CODE_FOR_avx2_gatherdiv4sf;
35695 goto gather_gen;
35696 case IX86_BUILTIN_GATHERDIV8SF:
35697 icode = CODE_FOR_avx2_gatherdiv8sf;
35698 goto gather_gen;
35699 case IX86_BUILTIN_GATHERSIV2DI:
35700 icode = CODE_FOR_avx2_gathersiv2di;
35701 goto gather_gen;
35702 case IX86_BUILTIN_GATHERSIV4DI:
35703 icode = CODE_FOR_avx2_gathersiv4di;
35704 goto gather_gen;
35705 case IX86_BUILTIN_GATHERDIV2DI:
35706 icode = CODE_FOR_avx2_gatherdiv2di;
35707 goto gather_gen;
35708 case IX86_BUILTIN_GATHERDIV4DI:
35709 icode = CODE_FOR_avx2_gatherdiv4di;
35710 goto gather_gen;
35711 case IX86_BUILTIN_GATHERSIV4SI:
35712 icode = CODE_FOR_avx2_gathersiv4si;
35713 goto gather_gen;
35714 case IX86_BUILTIN_GATHERSIV8SI:
35715 icode = CODE_FOR_avx2_gathersiv8si;
35716 goto gather_gen;
35717 case IX86_BUILTIN_GATHERDIV4SI:
35718 icode = CODE_FOR_avx2_gatherdiv4si;
35719 goto gather_gen;
35720 case IX86_BUILTIN_GATHERDIV8SI:
35721 icode = CODE_FOR_avx2_gatherdiv8si;
35722 goto gather_gen;
35723 case IX86_BUILTIN_GATHERALTSIV4DF:
35724 icode = CODE_FOR_avx2_gathersiv4df;
35725 goto gather_gen;
35726 case IX86_BUILTIN_GATHERALTDIV8SF:
35727 icode = CODE_FOR_avx2_gatherdiv8sf;
35728 goto gather_gen;
35729 case IX86_BUILTIN_GATHERALTSIV4DI:
35730 icode = CODE_FOR_avx2_gathersiv4di;
35731 goto gather_gen;
35732 case IX86_BUILTIN_GATHERALTDIV8SI:
35733 icode = CODE_FOR_avx2_gatherdiv8si;
35734 goto gather_gen;
35735 case IX86_BUILTIN_GATHER3SIV16SF:
35736 icode = CODE_FOR_avx512f_gathersiv16sf;
35737 goto gather_gen;
35738 case IX86_BUILTIN_GATHER3SIV8DF:
35739 icode = CODE_FOR_avx512f_gathersiv8df;
35740 goto gather_gen;
35741 case IX86_BUILTIN_GATHER3DIV16SF:
35742 icode = CODE_FOR_avx512f_gatherdiv16sf;
35743 goto gather_gen;
35744 case IX86_BUILTIN_GATHER3DIV8DF:
35745 icode = CODE_FOR_avx512f_gatherdiv8df;
35746 goto gather_gen;
35747 case IX86_BUILTIN_GATHER3SIV16SI:
35748 icode = CODE_FOR_avx512f_gathersiv16si;
35749 goto gather_gen;
35750 case IX86_BUILTIN_GATHER3SIV8DI:
35751 icode = CODE_FOR_avx512f_gathersiv8di;
35752 goto gather_gen;
35753 case IX86_BUILTIN_GATHER3DIV16SI:
35754 icode = CODE_FOR_avx512f_gatherdiv16si;
35755 goto gather_gen;
35756 case IX86_BUILTIN_GATHER3DIV8DI:
35757 icode = CODE_FOR_avx512f_gatherdiv8di;
35758 goto gather_gen;
35759 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35760 icode = CODE_FOR_avx512f_gathersiv8df;
35761 goto gather_gen;
35762 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35763 icode = CODE_FOR_avx512f_gatherdiv16sf;
35764 goto gather_gen;
35765 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35766 icode = CODE_FOR_avx512f_gathersiv8di;
35767 goto gather_gen;
35768 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35769 icode = CODE_FOR_avx512f_gatherdiv16si;
35770 goto gather_gen;
35771 case IX86_BUILTIN_SCATTERSIV16SF:
35772 icode = CODE_FOR_avx512f_scattersiv16sf;
35773 goto scatter_gen;
35774 case IX86_BUILTIN_SCATTERSIV8DF:
35775 icode = CODE_FOR_avx512f_scattersiv8df;
35776 goto scatter_gen;
35777 case IX86_BUILTIN_SCATTERDIV16SF:
35778 icode = CODE_FOR_avx512f_scatterdiv16sf;
35779 goto scatter_gen;
35780 case IX86_BUILTIN_SCATTERDIV8DF:
35781 icode = CODE_FOR_avx512f_scatterdiv8df;
35782 goto scatter_gen;
35783 case IX86_BUILTIN_SCATTERSIV16SI:
35784 icode = CODE_FOR_avx512f_scattersiv16si;
35785 goto scatter_gen;
35786 case IX86_BUILTIN_SCATTERSIV8DI:
35787 icode = CODE_FOR_avx512f_scattersiv8di;
35788 goto scatter_gen;
35789 case IX86_BUILTIN_SCATTERDIV16SI:
35790 icode = CODE_FOR_avx512f_scatterdiv16si;
35791 goto scatter_gen;
35792 case IX86_BUILTIN_SCATTERDIV8DI:
35793 icode = CODE_FOR_avx512f_scatterdiv8di;
35794 goto scatter_gen;
35796 case IX86_BUILTIN_GATHERPFDPD:
35797 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35798 goto vec_prefetch_gen;
35799 case IX86_BUILTIN_GATHERPFDPS:
35800 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35801 goto vec_prefetch_gen;
35802 case IX86_BUILTIN_GATHERPFQPD:
35803 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35804 goto vec_prefetch_gen;
35805 case IX86_BUILTIN_GATHERPFQPS:
35806 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35807 goto vec_prefetch_gen;
35808 case IX86_BUILTIN_SCATTERPFDPD:
35809 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35810 goto vec_prefetch_gen;
35811 case IX86_BUILTIN_SCATTERPFDPS:
35812 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35813 goto vec_prefetch_gen;
35814 case IX86_BUILTIN_SCATTERPFQPD:
35815 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35816 goto vec_prefetch_gen;
35817 case IX86_BUILTIN_SCATTERPFQPS:
35818 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35819 goto vec_prefetch_gen;
35821 gather_gen:
35822 rtx half;
35823 rtx (*gen) (rtx, rtx);
35825 arg0 = CALL_EXPR_ARG (exp, 0);
35826 arg1 = CALL_EXPR_ARG (exp, 1);
35827 arg2 = CALL_EXPR_ARG (exp, 2);
35828 arg3 = CALL_EXPR_ARG (exp, 3);
35829 arg4 = CALL_EXPR_ARG (exp, 4);
35830 op0 = expand_normal (arg0);
35831 op1 = expand_normal (arg1);
35832 op2 = expand_normal (arg2);
35833 op3 = expand_normal (arg3);
35834 op4 = expand_normal (arg4);
35835 /* Note the arg order is different from the operand order. */
35836 mode0 = insn_data[icode].operand[1].mode;
35837 mode2 = insn_data[icode].operand[3].mode;
35838 mode3 = insn_data[icode].operand[4].mode;
35839 mode4 = insn_data[icode].operand[5].mode;
35841 if (target == NULL_RTX
35842 || GET_MODE (target) != insn_data[icode].operand[0].mode
35843 || !insn_data[icode].operand[0].predicate (target,
35844 GET_MODE (target)))
35845 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35846 else
35847 subtarget = target;
35849 switch (fcode)
35851 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35852 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35853 half = gen_reg_rtx (V8SImode);
35854 if (!nonimmediate_operand (op2, V16SImode))
35855 op2 = copy_to_mode_reg (V16SImode, op2);
35856 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35857 op2 = half;
35858 break;
35859 case IX86_BUILTIN_GATHERALTSIV4DF:
35860 case IX86_BUILTIN_GATHERALTSIV4DI:
35861 half = gen_reg_rtx (V4SImode);
35862 if (!nonimmediate_operand (op2, V8SImode))
35863 op2 = copy_to_mode_reg (V8SImode, op2);
35864 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35865 op2 = half;
35866 break;
35867 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35868 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35869 half = gen_reg_rtx (mode0);
35870 if (mode0 == V8SFmode)
35871 gen = gen_vec_extract_lo_v16sf;
35872 else
35873 gen = gen_vec_extract_lo_v16si;
35874 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35875 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35876 emit_insn (gen (half, op0));
35877 op0 = half;
35878 if (GET_MODE (op3) != VOIDmode)
35880 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35881 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35882 emit_insn (gen (half, op3));
35883 op3 = half;
35885 break;
35886 case IX86_BUILTIN_GATHERALTDIV8SF:
35887 case IX86_BUILTIN_GATHERALTDIV8SI:
35888 half = gen_reg_rtx (mode0);
35889 if (mode0 == V4SFmode)
35890 gen = gen_vec_extract_lo_v8sf;
35891 else
35892 gen = gen_vec_extract_lo_v8si;
35893 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35894 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35895 emit_insn (gen (half, op0));
35896 op0 = half;
35897 if (GET_MODE (op3) != VOIDmode)
35899 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35900 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35901 emit_insn (gen (half, op3));
35902 op3 = half;
35904 break;
35905 default:
35906 break;
35909 /* Force memory operand only with base register here. But we
35910 don't want to do it on memory operand for other builtin
35911 functions. */
35912 op1 = ix86_zero_extend_to_Pmode (op1);
35914 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35915 op0 = copy_to_mode_reg (mode0, op0);
35916 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35917 op1 = copy_to_mode_reg (Pmode, op1);
35918 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35919 op2 = copy_to_mode_reg (mode2, op2);
35920 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35922 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35923 op3 = copy_to_mode_reg (mode3, op3);
35925 else
35927 op3 = copy_to_reg (op3);
35928 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35930 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35932 error ("the last argument must be scale 1, 2, 4, 8");
35933 return const0_rtx;
35936 /* Optimize. If mask is known to have all high bits set,
35937 replace op0 with pc_rtx to signal that the instruction
35938 overwrites the whole destination and doesn't use its
35939 previous contents. */
35940 if (optimize)
35942 if (TREE_CODE (arg3) == INTEGER_CST)
35944 if (integer_all_onesp (arg3))
35945 op0 = pc_rtx;
35947 else if (TREE_CODE (arg3) == VECTOR_CST)
35949 unsigned int negative = 0;
35950 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35952 tree cst = VECTOR_CST_ELT (arg3, i);
35953 if (TREE_CODE (cst) == INTEGER_CST
35954 && tree_int_cst_sign_bit (cst))
35955 negative++;
35956 else if (TREE_CODE (cst) == REAL_CST
35957 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35958 negative++;
35960 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35961 op0 = pc_rtx;
35963 else if (TREE_CODE (arg3) == SSA_NAME
35964 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35966 /* Recognize also when mask is like:
35967 __v2df src = _mm_setzero_pd ();
35968 __v2df mask = _mm_cmpeq_pd (src, src);
35970 __v8sf src = _mm256_setzero_ps ();
35971 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35972 as that is a cheaper way to load all ones into
35973 a register than having to load a constant from
35974 memory. */
35975 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35976 if (is_gimple_call (def_stmt))
35978 tree fndecl = gimple_call_fndecl (def_stmt);
35979 if (fndecl
35980 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35981 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35983 case IX86_BUILTIN_CMPPD:
35984 case IX86_BUILTIN_CMPPS:
35985 case IX86_BUILTIN_CMPPD256:
35986 case IX86_BUILTIN_CMPPS256:
35987 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35988 break;
35989 /* FALLTHRU */
35990 case IX86_BUILTIN_CMPEQPD:
35991 case IX86_BUILTIN_CMPEQPS:
35992 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35993 && initializer_zerop (gimple_call_arg (def_stmt,
35994 1)))
35995 op0 = pc_rtx;
35996 break;
35997 default:
35998 break;
36004 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36005 if (! pat)
36006 return const0_rtx;
36007 emit_insn (pat);
36009 switch (fcode)
36011 case IX86_BUILTIN_GATHER3DIV16SF:
36012 if (target == NULL_RTX)
36013 target = gen_reg_rtx (V8SFmode);
36014 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36015 break;
36016 case IX86_BUILTIN_GATHER3DIV16SI:
36017 if (target == NULL_RTX)
36018 target = gen_reg_rtx (V8SImode);
36019 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36020 break;
36021 case IX86_BUILTIN_GATHERDIV8SF:
36022 if (target == NULL_RTX)
36023 target = gen_reg_rtx (V4SFmode);
36024 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36025 break;
36026 case IX86_BUILTIN_GATHERDIV8SI:
36027 if (target == NULL_RTX)
36028 target = gen_reg_rtx (V4SImode);
36029 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36030 break;
36031 default:
36032 target = subtarget;
36033 break;
36035 return target;
36037 scatter_gen:
36038 arg0 = CALL_EXPR_ARG (exp, 0);
36039 arg1 = CALL_EXPR_ARG (exp, 1);
36040 arg2 = CALL_EXPR_ARG (exp, 2);
36041 arg3 = CALL_EXPR_ARG (exp, 3);
36042 arg4 = CALL_EXPR_ARG (exp, 4);
36043 op0 = expand_normal (arg0);
36044 op1 = expand_normal (arg1);
36045 op2 = expand_normal (arg2);
36046 op3 = expand_normal (arg3);
36047 op4 = expand_normal (arg4);
36048 mode1 = insn_data[icode].operand[1].mode;
36049 mode2 = insn_data[icode].operand[2].mode;
36050 mode3 = insn_data[icode].operand[3].mode;
36051 mode4 = insn_data[icode].operand[4].mode;
36053 /* Force memory operand only with base register here. But we
36054 don't want to do it on memory operand for other builtin
36055 functions. */
36056 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36058 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36059 op0 = copy_to_mode_reg (Pmode, op0);
36061 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36063 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36064 op1 = copy_to_mode_reg (mode1, op1);
36066 else
36068 op1 = copy_to_reg (op1);
36069 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36072 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36073 op2 = copy_to_mode_reg (mode2, op2);
36075 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36076 op3 = copy_to_mode_reg (mode3, op3);
36078 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36080 error ("the last argument must be scale 1, 2, 4, 8");
36081 return const0_rtx;
36084 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36085 if (! pat)
36086 return const0_rtx;
36088 emit_insn (pat);
36089 return 0;
36091 vec_prefetch_gen:
36092 arg0 = CALL_EXPR_ARG (exp, 0);
36093 arg1 = CALL_EXPR_ARG (exp, 1);
36094 arg2 = CALL_EXPR_ARG (exp, 2);
36095 arg3 = CALL_EXPR_ARG (exp, 3);
36096 arg4 = CALL_EXPR_ARG (exp, 4);
36097 op0 = expand_normal (arg0);
36098 op1 = expand_normal (arg1);
36099 op2 = expand_normal (arg2);
36100 op3 = expand_normal (arg3);
36101 op4 = expand_normal (arg4);
36102 mode0 = insn_data[icode].operand[0].mode;
36103 mode1 = insn_data[icode].operand[1].mode;
36104 mode3 = insn_data[icode].operand[3].mode;
36105 mode4 = insn_data[icode].operand[4].mode;
36107 if (GET_MODE (op0) == mode0
36108 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36110 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36111 op0 = copy_to_mode_reg (mode0, op0);
36113 else if (op0 != constm1_rtx)
36115 op0 = copy_to_reg (op0);
36116 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36119 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36120 op1 = copy_to_mode_reg (mode1, op1);
36122 /* Force memory operand only with base register here. But we
36123 don't want to do it on memory operand for other builtin
36124 functions. */
36125 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36127 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36128 op2 = copy_to_mode_reg (Pmode, op2);
36130 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36132 error ("the forth argument must be scale 1, 2, 4, 8");
36133 return const0_rtx;
36136 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36138 error ("incorrect hint operand");
36139 return const0_rtx;
36142 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36143 if (! pat)
36144 return const0_rtx;
36146 emit_insn (pat);
36148 return 0;
36150 case IX86_BUILTIN_XABORT:
36151 icode = CODE_FOR_xabort;
36152 arg0 = CALL_EXPR_ARG (exp, 0);
36153 op0 = expand_normal (arg0);
36154 mode0 = insn_data[icode].operand[0].mode;
36155 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36157 error ("the xabort's argument must be an 8-bit immediate");
36158 return const0_rtx;
36160 emit_insn (gen_xabort (op0));
36161 return 0;
36163 default:
36164 break;
36167 for (i = 0, d = bdesc_special_args;
36168 i < ARRAY_SIZE (bdesc_special_args);
36169 i++, d++)
36170 if (d->code == fcode)
36171 return ix86_expand_special_args_builtin (d, exp, target);
36173 for (i = 0, d = bdesc_args;
36174 i < ARRAY_SIZE (bdesc_args);
36175 i++, d++)
36176 if (d->code == fcode)
36177 switch (fcode)
36179 case IX86_BUILTIN_FABSQ:
36180 case IX86_BUILTIN_COPYSIGNQ:
36181 if (!TARGET_SSE)
36182 /* Emit a normal call if SSE isn't available. */
36183 return expand_call (exp, target, ignore);
36184 default:
36185 return ix86_expand_args_builtin (d, exp, target);
36188 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36189 if (d->code == fcode)
36190 return ix86_expand_sse_comi (d, exp, target);
36192 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36193 if (d->code == fcode)
36194 return ix86_expand_round_builtin (d, exp, target);
36196 for (i = 0, d = bdesc_pcmpestr;
36197 i < ARRAY_SIZE (bdesc_pcmpestr);
36198 i++, d++)
36199 if (d->code == fcode)
36200 return ix86_expand_sse_pcmpestr (d, exp, target);
36202 for (i = 0, d = bdesc_pcmpistr;
36203 i < ARRAY_SIZE (bdesc_pcmpistr);
36204 i++, d++)
36205 if (d->code == fcode)
36206 return ix86_expand_sse_pcmpistr (d, exp, target);
36208 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36209 if (d->code == fcode)
36210 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36211 (enum ix86_builtin_func_type)
36212 d->flag, d->comparison);
36214 gcc_unreachable ();
36217 /* This returns the target-specific builtin with code CODE if
36218 current_function_decl has visibility on this builtin, which is checked
36219 using isa flags. Returns NULL_TREE otherwise. */
36221 static tree ix86_get_builtin (enum ix86_builtins code)
36223 struct cl_target_option *opts;
36224 tree target_tree = NULL_TREE;
36226 /* Determine the isa flags of current_function_decl. */
36228 if (current_function_decl)
36229 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36231 if (target_tree == NULL)
36232 target_tree = target_option_default_node;
36234 opts = TREE_TARGET_OPTION (target_tree);
36236 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36237 return ix86_builtin_decl (code, true);
36238 else
36239 return NULL_TREE;
36242 /* Returns a function decl for a vectorized version of the builtin function
36243 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36244 if it is not available. */
36246 static tree
36247 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36248 tree type_in)
36250 enum machine_mode in_mode, out_mode;
36251 int in_n, out_n;
36252 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36254 if (TREE_CODE (type_out) != VECTOR_TYPE
36255 || TREE_CODE (type_in) != VECTOR_TYPE
36256 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36257 return NULL_TREE;
36259 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36260 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36261 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36262 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36264 switch (fn)
36266 case BUILT_IN_SQRT:
36267 if (out_mode == DFmode && in_mode == DFmode)
36269 if (out_n == 2 && in_n == 2)
36270 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36271 else if (out_n == 4 && in_n == 4)
36272 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36273 else if (out_n == 8 && in_n == 8)
36274 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36276 break;
36278 case BUILT_IN_EXP2F:
36279 if (out_mode == SFmode && in_mode == SFmode)
36281 if (out_n == 16 && in_n == 16)
36282 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36284 break;
36286 case BUILT_IN_SQRTF:
36287 if (out_mode == SFmode && in_mode == SFmode)
36289 if (out_n == 4 && in_n == 4)
36290 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36291 else if (out_n == 8 && in_n == 8)
36292 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36293 else if (out_n == 16 && in_n == 16)
36294 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36296 break;
36298 case BUILT_IN_IFLOOR:
36299 case BUILT_IN_LFLOOR:
36300 case BUILT_IN_LLFLOOR:
36301 /* The round insn does not trap on denormals. */
36302 if (flag_trapping_math || !TARGET_ROUND)
36303 break;
36305 if (out_mode == SImode && in_mode == DFmode)
36307 if (out_n == 4 && in_n == 2)
36308 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36309 else if (out_n == 8 && in_n == 4)
36310 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36311 else if (out_n == 16 && in_n == 8)
36312 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36314 break;
36316 case BUILT_IN_IFLOORF:
36317 case BUILT_IN_LFLOORF:
36318 case BUILT_IN_LLFLOORF:
36319 /* The round insn does not trap on denormals. */
36320 if (flag_trapping_math || !TARGET_ROUND)
36321 break;
36323 if (out_mode == SImode && in_mode == SFmode)
36325 if (out_n == 4 && in_n == 4)
36326 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36327 else if (out_n == 8 && in_n == 8)
36328 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36330 break;
36332 case BUILT_IN_ICEIL:
36333 case BUILT_IN_LCEIL:
36334 case BUILT_IN_LLCEIL:
36335 /* The round insn does not trap on denormals. */
36336 if (flag_trapping_math || !TARGET_ROUND)
36337 break;
36339 if (out_mode == SImode && in_mode == DFmode)
36341 if (out_n == 4 && in_n == 2)
36342 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36343 else if (out_n == 8 && in_n == 4)
36344 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36345 else if (out_n == 16 && in_n == 8)
36346 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36348 break;
36350 case BUILT_IN_ICEILF:
36351 case BUILT_IN_LCEILF:
36352 case BUILT_IN_LLCEILF:
36353 /* The round insn does not trap on denormals. */
36354 if (flag_trapping_math || !TARGET_ROUND)
36355 break;
36357 if (out_mode == SImode && in_mode == SFmode)
36359 if (out_n == 4 && in_n == 4)
36360 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36361 else if (out_n == 8 && in_n == 8)
36362 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36364 break;
36366 case BUILT_IN_IRINT:
36367 case BUILT_IN_LRINT:
36368 case BUILT_IN_LLRINT:
36369 if (out_mode == SImode && in_mode == DFmode)
36371 if (out_n == 4 && in_n == 2)
36372 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36373 else if (out_n == 8 && in_n == 4)
36374 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36376 break;
36378 case BUILT_IN_IRINTF:
36379 case BUILT_IN_LRINTF:
36380 case BUILT_IN_LLRINTF:
36381 if (out_mode == SImode && in_mode == SFmode)
36383 if (out_n == 4 && in_n == 4)
36384 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36385 else if (out_n == 8 && in_n == 8)
36386 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36388 break;
36390 case BUILT_IN_IROUND:
36391 case BUILT_IN_LROUND:
36392 case BUILT_IN_LLROUND:
36393 /* The round insn does not trap on denormals. */
36394 if (flag_trapping_math || !TARGET_ROUND)
36395 break;
36397 if (out_mode == SImode && in_mode == DFmode)
36399 if (out_n == 4 && in_n == 2)
36400 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36401 else if (out_n == 8 && in_n == 4)
36402 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36403 else if (out_n == 16 && in_n == 8)
36404 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36406 break;
36408 case BUILT_IN_IROUNDF:
36409 case BUILT_IN_LROUNDF:
36410 case BUILT_IN_LLROUNDF:
36411 /* The round insn does not trap on denormals. */
36412 if (flag_trapping_math || !TARGET_ROUND)
36413 break;
36415 if (out_mode == SImode && in_mode == SFmode)
36417 if (out_n == 4 && in_n == 4)
36418 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36419 else if (out_n == 8 && in_n == 8)
36420 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36422 break;
36424 case BUILT_IN_COPYSIGN:
36425 if (out_mode == DFmode && in_mode == DFmode)
36427 if (out_n == 2 && in_n == 2)
36428 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36429 else if (out_n == 4 && in_n == 4)
36430 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36431 else if (out_n == 8 && in_n == 8)
36432 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36434 break;
36436 case BUILT_IN_COPYSIGNF:
36437 if (out_mode == SFmode && in_mode == SFmode)
36439 if (out_n == 4 && in_n == 4)
36440 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36441 else if (out_n == 8 && in_n == 8)
36442 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36443 else if (out_n == 16 && in_n == 16)
36444 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36446 break;
36448 case BUILT_IN_FLOOR:
36449 /* The round insn does not trap on denormals. */
36450 if (flag_trapping_math || !TARGET_ROUND)
36451 break;
36453 if (out_mode == DFmode && in_mode == DFmode)
36455 if (out_n == 2 && in_n == 2)
36456 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36457 else if (out_n == 4 && in_n == 4)
36458 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36460 break;
36462 case BUILT_IN_FLOORF:
36463 /* The round insn does not trap on denormals. */
36464 if (flag_trapping_math || !TARGET_ROUND)
36465 break;
36467 if (out_mode == SFmode && in_mode == SFmode)
36469 if (out_n == 4 && in_n == 4)
36470 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36471 else if (out_n == 8 && in_n == 8)
36472 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36474 break;
36476 case BUILT_IN_CEIL:
36477 /* The round insn does not trap on denormals. */
36478 if (flag_trapping_math || !TARGET_ROUND)
36479 break;
36481 if (out_mode == DFmode && in_mode == DFmode)
36483 if (out_n == 2 && in_n == 2)
36484 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36485 else if (out_n == 4 && in_n == 4)
36486 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36488 break;
36490 case BUILT_IN_CEILF:
36491 /* The round insn does not trap on denormals. */
36492 if (flag_trapping_math || !TARGET_ROUND)
36493 break;
36495 if (out_mode == SFmode && in_mode == SFmode)
36497 if (out_n == 4 && in_n == 4)
36498 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36499 else if (out_n == 8 && in_n == 8)
36500 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36502 break;
36504 case BUILT_IN_TRUNC:
36505 /* The round insn does not trap on denormals. */
36506 if (flag_trapping_math || !TARGET_ROUND)
36507 break;
36509 if (out_mode == DFmode && in_mode == DFmode)
36511 if (out_n == 2 && in_n == 2)
36512 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36513 else if (out_n == 4 && in_n == 4)
36514 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36516 break;
36518 case BUILT_IN_TRUNCF:
36519 /* The round insn does not trap on denormals. */
36520 if (flag_trapping_math || !TARGET_ROUND)
36521 break;
36523 if (out_mode == SFmode && in_mode == SFmode)
36525 if (out_n == 4 && in_n == 4)
36526 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36527 else if (out_n == 8 && in_n == 8)
36528 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36530 break;
36532 case BUILT_IN_RINT:
36533 /* The round insn does not trap on denormals. */
36534 if (flag_trapping_math || !TARGET_ROUND)
36535 break;
36537 if (out_mode == DFmode && in_mode == DFmode)
36539 if (out_n == 2 && in_n == 2)
36540 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36541 else if (out_n == 4 && in_n == 4)
36542 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36544 break;
36546 case BUILT_IN_RINTF:
36547 /* The round insn does not trap on denormals. */
36548 if (flag_trapping_math || !TARGET_ROUND)
36549 break;
36551 if (out_mode == SFmode && in_mode == SFmode)
36553 if (out_n == 4 && in_n == 4)
36554 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36555 else if (out_n == 8 && in_n == 8)
36556 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36558 break;
36560 case BUILT_IN_ROUND:
36561 /* The round insn does not trap on denormals. */
36562 if (flag_trapping_math || !TARGET_ROUND)
36563 break;
36565 if (out_mode == DFmode && in_mode == DFmode)
36567 if (out_n == 2 && in_n == 2)
36568 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36569 else if (out_n == 4 && in_n == 4)
36570 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36572 break;
36574 case BUILT_IN_ROUNDF:
36575 /* The round insn does not trap on denormals. */
36576 if (flag_trapping_math || !TARGET_ROUND)
36577 break;
36579 if (out_mode == SFmode && in_mode == SFmode)
36581 if (out_n == 4 && in_n == 4)
36582 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36583 else if (out_n == 8 && in_n == 8)
36584 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36586 break;
36588 case BUILT_IN_FMA:
36589 if (out_mode == DFmode && in_mode == DFmode)
36591 if (out_n == 2 && in_n == 2)
36592 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36593 if (out_n == 4 && in_n == 4)
36594 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36596 break;
36598 case BUILT_IN_FMAF:
36599 if (out_mode == SFmode && in_mode == SFmode)
36601 if (out_n == 4 && in_n == 4)
36602 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36603 if (out_n == 8 && in_n == 8)
36604 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36606 break;
36608 default:
36609 break;
36612 /* Dispatch to a handler for a vectorization library. */
36613 if (ix86_veclib_handler)
36614 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36615 type_in);
36617 return NULL_TREE;
36620 /* Handler for an SVML-style interface to
36621 a library with vectorized intrinsics. */
36623 static tree
36624 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36626 char name[20];
36627 tree fntype, new_fndecl, args;
36628 unsigned arity;
36629 const char *bname;
36630 enum machine_mode el_mode, in_mode;
36631 int n, in_n;
36633 /* The SVML is suitable for unsafe math only. */
36634 if (!flag_unsafe_math_optimizations)
36635 return NULL_TREE;
36637 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36638 n = TYPE_VECTOR_SUBPARTS (type_out);
36639 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36640 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36641 if (el_mode != in_mode
36642 || n != in_n)
36643 return NULL_TREE;
36645 switch (fn)
36647 case BUILT_IN_EXP:
36648 case BUILT_IN_LOG:
36649 case BUILT_IN_LOG10:
36650 case BUILT_IN_POW:
36651 case BUILT_IN_TANH:
36652 case BUILT_IN_TAN:
36653 case BUILT_IN_ATAN:
36654 case BUILT_IN_ATAN2:
36655 case BUILT_IN_ATANH:
36656 case BUILT_IN_CBRT:
36657 case BUILT_IN_SINH:
36658 case BUILT_IN_SIN:
36659 case BUILT_IN_ASINH:
36660 case BUILT_IN_ASIN:
36661 case BUILT_IN_COSH:
36662 case BUILT_IN_COS:
36663 case BUILT_IN_ACOSH:
36664 case BUILT_IN_ACOS:
36665 if (el_mode != DFmode || n != 2)
36666 return NULL_TREE;
36667 break;
36669 case BUILT_IN_EXPF:
36670 case BUILT_IN_LOGF:
36671 case BUILT_IN_LOG10F:
36672 case BUILT_IN_POWF:
36673 case BUILT_IN_TANHF:
36674 case BUILT_IN_TANF:
36675 case BUILT_IN_ATANF:
36676 case BUILT_IN_ATAN2F:
36677 case BUILT_IN_ATANHF:
36678 case BUILT_IN_CBRTF:
36679 case BUILT_IN_SINHF:
36680 case BUILT_IN_SINF:
36681 case BUILT_IN_ASINHF:
36682 case BUILT_IN_ASINF:
36683 case BUILT_IN_COSHF:
36684 case BUILT_IN_COSF:
36685 case BUILT_IN_ACOSHF:
36686 case BUILT_IN_ACOSF:
36687 if (el_mode != SFmode || n != 4)
36688 return NULL_TREE;
36689 break;
36691 default:
36692 return NULL_TREE;
36695 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36697 if (fn == BUILT_IN_LOGF)
36698 strcpy (name, "vmlsLn4");
36699 else if (fn == BUILT_IN_LOG)
36700 strcpy (name, "vmldLn2");
36701 else if (n == 4)
36703 sprintf (name, "vmls%s", bname+10);
36704 name[strlen (name)-1] = '4';
36706 else
36707 sprintf (name, "vmld%s2", bname+10);
36709 /* Convert to uppercase. */
36710 name[4] &= ~0x20;
36712 arity = 0;
36713 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36714 args;
36715 args = TREE_CHAIN (args))
36716 arity++;
36718 if (arity == 1)
36719 fntype = build_function_type_list (type_out, type_in, NULL);
36720 else
36721 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36723 /* Build a function declaration for the vectorized function. */
36724 new_fndecl = build_decl (BUILTINS_LOCATION,
36725 FUNCTION_DECL, get_identifier (name), fntype);
36726 TREE_PUBLIC (new_fndecl) = 1;
36727 DECL_EXTERNAL (new_fndecl) = 1;
36728 DECL_IS_NOVOPS (new_fndecl) = 1;
36729 TREE_READONLY (new_fndecl) = 1;
36731 return new_fndecl;
36734 /* Handler for an ACML-style interface to
36735 a library with vectorized intrinsics. */
36737 static tree
36738 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36740 char name[20] = "__vr.._";
36741 tree fntype, new_fndecl, args;
36742 unsigned arity;
36743 const char *bname;
36744 enum machine_mode el_mode, in_mode;
36745 int n, in_n;
36747 /* The ACML is 64bits only and suitable for unsafe math only as
36748 it does not correctly support parts of IEEE with the required
36749 precision such as denormals. */
36750 if (!TARGET_64BIT
36751 || !flag_unsafe_math_optimizations)
36752 return NULL_TREE;
36754 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36755 n = TYPE_VECTOR_SUBPARTS (type_out);
36756 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36757 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36758 if (el_mode != in_mode
36759 || n != in_n)
36760 return NULL_TREE;
36762 switch (fn)
36764 case BUILT_IN_SIN:
36765 case BUILT_IN_COS:
36766 case BUILT_IN_EXP:
36767 case BUILT_IN_LOG:
36768 case BUILT_IN_LOG2:
36769 case BUILT_IN_LOG10:
36770 name[4] = 'd';
36771 name[5] = '2';
36772 if (el_mode != DFmode
36773 || n != 2)
36774 return NULL_TREE;
36775 break;
36777 case BUILT_IN_SINF:
36778 case BUILT_IN_COSF:
36779 case BUILT_IN_EXPF:
36780 case BUILT_IN_POWF:
36781 case BUILT_IN_LOGF:
36782 case BUILT_IN_LOG2F:
36783 case BUILT_IN_LOG10F:
36784 name[4] = 's';
36785 name[5] = '4';
36786 if (el_mode != SFmode
36787 || n != 4)
36788 return NULL_TREE;
36789 break;
36791 default:
36792 return NULL_TREE;
36795 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36796 sprintf (name + 7, "%s", bname+10);
36798 arity = 0;
36799 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36800 args;
36801 args = TREE_CHAIN (args))
36802 arity++;
36804 if (arity == 1)
36805 fntype = build_function_type_list (type_out, type_in, NULL);
36806 else
36807 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36809 /* Build a function declaration for the vectorized function. */
36810 new_fndecl = build_decl (BUILTINS_LOCATION,
36811 FUNCTION_DECL, get_identifier (name), fntype);
36812 TREE_PUBLIC (new_fndecl) = 1;
36813 DECL_EXTERNAL (new_fndecl) = 1;
36814 DECL_IS_NOVOPS (new_fndecl) = 1;
36815 TREE_READONLY (new_fndecl) = 1;
36817 return new_fndecl;
36820 /* Returns a decl of a function that implements gather load with
36821 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36822 Return NULL_TREE if it is not available. */
36824 static tree
36825 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36826 const_tree index_type, int scale)
36828 bool si;
36829 enum ix86_builtins code;
36831 if (! TARGET_AVX2)
36832 return NULL_TREE;
36834 if ((TREE_CODE (index_type) != INTEGER_TYPE
36835 && !POINTER_TYPE_P (index_type))
36836 || (TYPE_MODE (index_type) != SImode
36837 && TYPE_MODE (index_type) != DImode))
36838 return NULL_TREE;
36840 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36841 return NULL_TREE;
36843 /* v*gather* insn sign extends index to pointer mode. */
36844 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36845 && TYPE_UNSIGNED (index_type))
36846 return NULL_TREE;
36848 if (scale <= 0
36849 || scale > 8
36850 || (scale & (scale - 1)) != 0)
36851 return NULL_TREE;
36853 si = TYPE_MODE (index_type) == SImode;
36854 switch (TYPE_MODE (mem_vectype))
36856 case V2DFmode:
36857 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36858 break;
36859 case V4DFmode:
36860 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36861 break;
36862 case V2DImode:
36863 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36864 break;
36865 case V4DImode:
36866 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36867 break;
36868 case V4SFmode:
36869 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36870 break;
36871 case V8SFmode:
36872 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36873 break;
36874 case V4SImode:
36875 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36876 break;
36877 case V8SImode:
36878 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36879 break;
36880 case V8DFmode:
36881 if (TARGET_AVX512F)
36882 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36883 else
36884 return NULL_TREE;
36885 break;
36886 case V8DImode:
36887 if (TARGET_AVX512F)
36888 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36889 else
36890 return NULL_TREE;
36891 break;
36892 case V16SFmode:
36893 if (TARGET_AVX512F)
36894 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36895 else
36896 return NULL_TREE;
36897 break;
36898 case V16SImode:
36899 if (TARGET_AVX512F)
36900 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36901 else
36902 return NULL_TREE;
36903 break;
36904 default:
36905 return NULL_TREE;
36908 return ix86_get_builtin (code);
36911 /* Returns a code for a target-specific builtin that implements
36912 reciprocal of the function, or NULL_TREE if not available. */
36914 static tree
36915 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36916 bool sqrt ATTRIBUTE_UNUSED)
36918 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36919 && flag_finite_math_only && !flag_trapping_math
36920 && flag_unsafe_math_optimizations))
36921 return NULL_TREE;
36923 if (md_fn)
36924 /* Machine dependent builtins. */
36925 switch (fn)
36927 /* Vectorized version of sqrt to rsqrt conversion. */
36928 case IX86_BUILTIN_SQRTPS_NR:
36929 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36931 case IX86_BUILTIN_SQRTPS_NR256:
36932 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36934 default:
36935 return NULL_TREE;
36937 else
36938 /* Normal builtins. */
36939 switch (fn)
36941 /* Sqrt to rsqrt conversion. */
36942 case BUILT_IN_SQRTF:
36943 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36945 default:
36946 return NULL_TREE;
36950 /* Helper for avx_vpermilps256_operand et al. This is also used by
36951 the expansion functions to turn the parallel back into a mask.
36952 The return value is 0 for no match and the imm8+1 for a match. */
36955 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36957 unsigned i, nelt = GET_MODE_NUNITS (mode);
36958 unsigned mask = 0;
36959 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36961 if (XVECLEN (par, 0) != (int) nelt)
36962 return 0;
36964 /* Validate that all of the elements are constants, and not totally
36965 out of range. Copy the data into an integral array to make the
36966 subsequent checks easier. */
36967 for (i = 0; i < nelt; ++i)
36969 rtx er = XVECEXP (par, 0, i);
36970 unsigned HOST_WIDE_INT ei;
36972 if (!CONST_INT_P (er))
36973 return 0;
36974 ei = INTVAL (er);
36975 if (ei >= nelt)
36976 return 0;
36977 ipar[i] = ei;
36980 switch (mode)
36982 case V8DFmode:
36983 /* In the 512-bit DFmode case, we can only move elements within
36984 a 128-bit lane. First fill the second part of the mask,
36985 then fallthru. */
36986 for (i = 4; i < 6; ++i)
36988 if (ipar[i] < 4 || ipar[i] >= 6)
36989 return 0;
36990 mask |= (ipar[i] - 4) << i;
36992 for (i = 6; i < 8; ++i)
36994 if (ipar[i] < 6)
36995 return 0;
36996 mask |= (ipar[i] - 6) << i;
36998 /* FALLTHRU */
37000 case V4DFmode:
37001 /* In the 256-bit DFmode case, we can only move elements within
37002 a 128-bit lane. */
37003 for (i = 0; i < 2; ++i)
37005 if (ipar[i] >= 2)
37006 return 0;
37007 mask |= ipar[i] << i;
37009 for (i = 2; i < 4; ++i)
37011 if (ipar[i] < 2)
37012 return 0;
37013 mask |= (ipar[i] - 2) << i;
37015 break;
37017 case V16SFmode:
37018 /* In 512 bit SFmode case, permutation in the upper 256 bits
37019 must mirror the permutation in the lower 256-bits. */
37020 for (i = 0; i < 8; ++i)
37021 if (ipar[i] + 8 != ipar[i + 8])
37022 return 0;
37023 /* FALLTHRU */
37025 case V8SFmode:
37026 /* In 256 bit SFmode case, we have full freedom of
37027 movement within the low 128-bit lane, but the high 128-bit
37028 lane must mirror the exact same pattern. */
37029 for (i = 0; i < 4; ++i)
37030 if (ipar[i] + 4 != ipar[i + 4])
37031 return 0;
37032 nelt = 4;
37033 /* FALLTHRU */
37035 case V2DFmode:
37036 case V4SFmode:
37037 /* In the 128-bit case, we've full freedom in the placement of
37038 the elements from the source operand. */
37039 for (i = 0; i < nelt; ++i)
37040 mask |= ipar[i] << (i * (nelt / 2));
37041 break;
37043 default:
37044 gcc_unreachable ();
37047 /* Make sure success has a non-zero value by adding one. */
37048 return mask + 1;
37051 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37052 the expansion functions to turn the parallel back into a mask.
37053 The return value is 0 for no match and the imm8+1 for a match. */
37056 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37058 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37059 unsigned mask = 0;
37060 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37062 if (XVECLEN (par, 0) != (int) nelt)
37063 return 0;
37065 /* Validate that all of the elements are constants, and not totally
37066 out of range. Copy the data into an integral array to make the
37067 subsequent checks easier. */
37068 for (i = 0; i < nelt; ++i)
37070 rtx er = XVECEXP (par, 0, i);
37071 unsigned HOST_WIDE_INT ei;
37073 if (!CONST_INT_P (er))
37074 return 0;
37075 ei = INTVAL (er);
37076 if (ei >= 2 * nelt)
37077 return 0;
37078 ipar[i] = ei;
37081 /* Validate that the halves of the permute are halves. */
37082 for (i = 0; i < nelt2 - 1; ++i)
37083 if (ipar[i] + 1 != ipar[i + 1])
37084 return 0;
37085 for (i = nelt2; i < nelt - 1; ++i)
37086 if (ipar[i] + 1 != ipar[i + 1])
37087 return 0;
37089 /* Reconstruct the mask. */
37090 for (i = 0; i < 2; ++i)
37092 unsigned e = ipar[i * nelt2];
37093 if (e % nelt2)
37094 return 0;
37095 e /= nelt2;
37096 mask |= e << (i * 4);
37099 /* Make sure success has a non-zero value by adding one. */
37100 return mask + 1;
37103 /* Return a register priority for hard reg REGNO. */
37104 static int
37105 ix86_register_priority (int hard_regno)
37107 /* ebp and r13 as the base always wants a displacement, r12 as the
37108 base always wants an index. So discourage their usage in an
37109 address. */
37110 if (hard_regno == R12_REG || hard_regno == R13_REG)
37111 return 0;
37112 if (hard_regno == BP_REG)
37113 return 1;
37114 /* New x86-64 int registers result in bigger code size. Discourage
37115 them. */
37116 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37117 return 2;
37118 /* New x86-64 SSE registers result in bigger code size. Discourage
37119 them. */
37120 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37121 return 2;
37122 /* Usage of AX register results in smaller code. Prefer it. */
37123 if (hard_regno == 0)
37124 return 4;
37125 return 3;
37128 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37130 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37131 QImode must go into class Q_REGS.
37132 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37133 movdf to do mem-to-mem moves through integer regs. */
37135 static reg_class_t
37136 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37138 enum machine_mode mode = GET_MODE (x);
37140 /* We're only allowed to return a subclass of CLASS. Many of the
37141 following checks fail for NO_REGS, so eliminate that early. */
37142 if (regclass == NO_REGS)
37143 return NO_REGS;
37145 /* All classes can load zeros. */
37146 if (x == CONST0_RTX (mode))
37147 return regclass;
37149 /* Force constants into memory if we are loading a (nonzero) constant into
37150 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37151 instructions to load from a constant. */
37152 if (CONSTANT_P (x)
37153 && (MAYBE_MMX_CLASS_P (regclass)
37154 || MAYBE_SSE_CLASS_P (regclass)
37155 || MAYBE_MASK_CLASS_P (regclass)))
37156 return NO_REGS;
37158 /* Prefer SSE regs only, if we can use them for math. */
37159 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37160 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37162 /* Floating-point constants need more complex checks. */
37163 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37165 /* General regs can load everything. */
37166 if (reg_class_subset_p (regclass, GENERAL_REGS))
37167 return regclass;
37169 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37170 zero above. We only want to wind up preferring 80387 registers if
37171 we plan on doing computation with them. */
37172 if (TARGET_80387
37173 && standard_80387_constant_p (x) > 0)
37175 /* Limit class to non-sse. */
37176 if (regclass == FLOAT_SSE_REGS)
37177 return FLOAT_REGS;
37178 if (regclass == FP_TOP_SSE_REGS)
37179 return FP_TOP_REG;
37180 if (regclass == FP_SECOND_SSE_REGS)
37181 return FP_SECOND_REG;
37182 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37183 return regclass;
37186 return NO_REGS;
37189 /* Generally when we see PLUS here, it's the function invariant
37190 (plus soft-fp const_int). Which can only be computed into general
37191 regs. */
37192 if (GET_CODE (x) == PLUS)
37193 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37195 /* QImode constants are easy to load, but non-constant QImode data
37196 must go into Q_REGS. */
37197 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37199 if (reg_class_subset_p (regclass, Q_REGS))
37200 return regclass;
37201 if (reg_class_subset_p (Q_REGS, regclass))
37202 return Q_REGS;
37203 return NO_REGS;
37206 return regclass;
37209 /* Discourage putting floating-point values in SSE registers unless
37210 SSE math is being used, and likewise for the 387 registers. */
37211 static reg_class_t
37212 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37214 enum machine_mode mode = GET_MODE (x);
37216 /* Restrict the output reload class to the register bank that we are doing
37217 math on. If we would like not to return a subset of CLASS, reject this
37218 alternative: if reload cannot do this, it will still use its choice. */
37219 mode = GET_MODE (x);
37220 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37221 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37223 if (X87_FLOAT_MODE_P (mode))
37225 if (regclass == FP_TOP_SSE_REGS)
37226 return FP_TOP_REG;
37227 else if (regclass == FP_SECOND_SSE_REGS)
37228 return FP_SECOND_REG;
37229 else
37230 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37233 return regclass;
37236 static reg_class_t
37237 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37238 enum machine_mode mode, secondary_reload_info *sri)
37240 /* Double-word spills from general registers to non-offsettable memory
37241 references (zero-extended addresses) require special handling. */
37242 if (TARGET_64BIT
37243 && MEM_P (x)
37244 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37245 && INTEGER_CLASS_P (rclass)
37246 && !offsettable_memref_p (x))
37248 sri->icode = (in_p
37249 ? CODE_FOR_reload_noff_load
37250 : CODE_FOR_reload_noff_store);
37251 /* Add the cost of moving address to a temporary. */
37252 sri->extra_cost = 1;
37254 return NO_REGS;
37257 /* QImode spills from non-QI registers require
37258 intermediate register on 32bit targets. */
37259 if (mode == QImode
37260 && (MAYBE_MASK_CLASS_P (rclass)
37261 || (!TARGET_64BIT && !in_p
37262 && INTEGER_CLASS_P (rclass)
37263 && MAYBE_NON_Q_CLASS_P (rclass))))
37265 int regno;
37267 if (REG_P (x))
37268 regno = REGNO (x);
37269 else
37270 regno = -1;
37272 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37273 regno = true_regnum (x);
37275 /* Return Q_REGS if the operand is in memory. */
37276 if (regno == -1)
37277 return Q_REGS;
37280 /* This condition handles corner case where an expression involving
37281 pointers gets vectorized. We're trying to use the address of a
37282 stack slot as a vector initializer.
37284 (set (reg:V2DI 74 [ vect_cst_.2 ])
37285 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37287 Eventually frame gets turned into sp+offset like this:
37289 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37290 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37291 (const_int 392 [0x188]))))
37293 That later gets turned into:
37295 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37296 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37297 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37299 We'll have the following reload recorded:
37301 Reload 0: reload_in (DI) =
37302 (plus:DI (reg/f:DI 7 sp)
37303 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37304 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37305 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37306 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37307 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37308 reload_reg_rtx: (reg:V2DI 22 xmm1)
37310 Which isn't going to work since SSE instructions can't handle scalar
37311 additions. Returning GENERAL_REGS forces the addition into integer
37312 register and reload can handle subsequent reloads without problems. */
37314 if (in_p && GET_CODE (x) == PLUS
37315 && SSE_CLASS_P (rclass)
37316 && SCALAR_INT_MODE_P (mode))
37317 return GENERAL_REGS;
37319 return NO_REGS;
37322 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37324 static bool
37325 ix86_class_likely_spilled_p (reg_class_t rclass)
37327 switch (rclass)
37329 case AREG:
37330 case DREG:
37331 case CREG:
37332 case BREG:
37333 case AD_REGS:
37334 case SIREG:
37335 case DIREG:
37336 case SSE_FIRST_REG:
37337 case FP_TOP_REG:
37338 case FP_SECOND_REG:
37339 return true;
37341 default:
37342 break;
37345 return false;
37348 /* If we are copying between general and FP registers, we need a memory
37349 location. The same is true for SSE and MMX registers.
37351 To optimize register_move_cost performance, allow inline variant.
37353 The macro can't work reliably when one of the CLASSES is class containing
37354 registers from multiple units (SSE, MMX, integer). We avoid this by never
37355 combining those units in single alternative in the machine description.
37356 Ensure that this constraint holds to avoid unexpected surprises.
37358 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37359 enforce these sanity checks. */
37361 static inline bool
37362 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37363 enum machine_mode mode, int strict)
37365 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37366 return false;
37367 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37368 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37369 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37370 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37371 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37372 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37374 gcc_assert (!strict || lra_in_progress);
37375 return true;
37378 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37379 return true;
37381 /* ??? This is a lie. We do have moves between mmx/general, and for
37382 mmx/sse2. But by saying we need secondary memory we discourage the
37383 register allocator from using the mmx registers unless needed. */
37384 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37385 return true;
37387 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37389 /* SSE1 doesn't have any direct moves from other classes. */
37390 if (!TARGET_SSE2)
37391 return true;
37393 /* If the target says that inter-unit moves are more expensive
37394 than moving through memory, then don't generate them. */
37395 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37396 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37397 return true;
37399 /* Between SSE and general, we have moves no larger than word size. */
37400 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37401 return true;
37404 return false;
37407 bool
37408 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37409 enum machine_mode mode, int strict)
37411 return inline_secondary_memory_needed (class1, class2, mode, strict);
37414 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37416 On the 80386, this is the size of MODE in words,
37417 except in the FP regs, where a single reg is always enough. */
37419 static unsigned char
37420 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37422 if (MAYBE_INTEGER_CLASS_P (rclass))
37424 if (mode == XFmode)
37425 return (TARGET_64BIT ? 2 : 3);
37426 else if (mode == XCmode)
37427 return (TARGET_64BIT ? 4 : 6);
37428 else
37429 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37431 else
37433 if (COMPLEX_MODE_P (mode))
37434 return 2;
37435 else
37436 return 1;
37440 /* Return true if the registers in CLASS cannot represent the change from
37441 modes FROM to TO. */
37443 bool
37444 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37445 enum reg_class regclass)
37447 if (from == to)
37448 return false;
37450 /* x87 registers can't do subreg at all, as all values are reformatted
37451 to extended precision. */
37452 if (MAYBE_FLOAT_CLASS_P (regclass))
37453 return true;
37455 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37457 /* Vector registers do not support QI or HImode loads. If we don't
37458 disallow a change to these modes, reload will assume it's ok to
37459 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37460 the vec_dupv4hi pattern. */
37461 if (GET_MODE_SIZE (from) < 4)
37462 return true;
37464 /* Vector registers do not support subreg with nonzero offsets, which
37465 are otherwise valid for integer registers. Since we can't see
37466 whether we have a nonzero offset from here, prohibit all
37467 nonparadoxical subregs changing size. */
37468 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37469 return true;
37472 return false;
37475 /* Return the cost of moving data of mode M between a
37476 register and memory. A value of 2 is the default; this cost is
37477 relative to those in `REGISTER_MOVE_COST'.
37479 This function is used extensively by register_move_cost that is used to
37480 build tables at startup. Make it inline in this case.
37481 When IN is 2, return maximum of in and out move cost.
37483 If moving between registers and memory is more expensive than
37484 between two registers, you should define this macro to express the
37485 relative cost.
37487 Model also increased moving costs of QImode registers in non
37488 Q_REGS classes.
37490 static inline int
37491 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37492 int in)
37494 int cost;
37495 if (FLOAT_CLASS_P (regclass))
37497 int index;
37498 switch (mode)
37500 case SFmode:
37501 index = 0;
37502 break;
37503 case DFmode:
37504 index = 1;
37505 break;
37506 case XFmode:
37507 index = 2;
37508 break;
37509 default:
37510 return 100;
37512 if (in == 2)
37513 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37514 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37516 if (SSE_CLASS_P (regclass))
37518 int index;
37519 switch (GET_MODE_SIZE (mode))
37521 case 4:
37522 index = 0;
37523 break;
37524 case 8:
37525 index = 1;
37526 break;
37527 case 16:
37528 index = 2;
37529 break;
37530 default:
37531 return 100;
37533 if (in == 2)
37534 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37535 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37537 if (MMX_CLASS_P (regclass))
37539 int index;
37540 switch (GET_MODE_SIZE (mode))
37542 case 4:
37543 index = 0;
37544 break;
37545 case 8:
37546 index = 1;
37547 break;
37548 default:
37549 return 100;
37551 if (in)
37552 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37553 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37555 switch (GET_MODE_SIZE (mode))
37557 case 1:
37558 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37560 if (!in)
37561 return ix86_cost->int_store[0];
37562 if (TARGET_PARTIAL_REG_DEPENDENCY
37563 && optimize_function_for_speed_p (cfun))
37564 cost = ix86_cost->movzbl_load;
37565 else
37566 cost = ix86_cost->int_load[0];
37567 if (in == 2)
37568 return MAX (cost, ix86_cost->int_store[0]);
37569 return cost;
37571 else
37573 if (in == 2)
37574 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37575 if (in)
37576 return ix86_cost->movzbl_load;
37577 else
37578 return ix86_cost->int_store[0] + 4;
37580 break;
37581 case 2:
37582 if (in == 2)
37583 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37584 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37585 default:
37586 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37587 if (mode == TFmode)
37588 mode = XFmode;
37589 if (in == 2)
37590 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37591 else if (in)
37592 cost = ix86_cost->int_load[2];
37593 else
37594 cost = ix86_cost->int_store[2];
37595 return (cost * (((int) GET_MODE_SIZE (mode)
37596 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37600 static int
37601 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37602 bool in)
37604 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37608 /* Return the cost of moving data from a register in class CLASS1 to
37609 one in class CLASS2.
37611 It is not required that the cost always equal 2 when FROM is the same as TO;
37612 on some machines it is expensive to move between registers if they are not
37613 general registers. */
37615 static int
37616 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37617 reg_class_t class2_i)
37619 enum reg_class class1 = (enum reg_class) class1_i;
37620 enum reg_class class2 = (enum reg_class) class2_i;
37622 /* In case we require secondary memory, compute cost of the store followed
37623 by load. In order to avoid bad register allocation choices, we need
37624 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37626 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37628 int cost = 1;
37630 cost += inline_memory_move_cost (mode, class1, 2);
37631 cost += inline_memory_move_cost (mode, class2, 2);
37633 /* In case of copying from general_purpose_register we may emit multiple
37634 stores followed by single load causing memory size mismatch stall.
37635 Count this as arbitrarily high cost of 20. */
37636 if (targetm.class_max_nregs (class1, mode)
37637 > targetm.class_max_nregs (class2, mode))
37638 cost += 20;
37640 /* In the case of FP/MMX moves, the registers actually overlap, and we
37641 have to switch modes in order to treat them differently. */
37642 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37643 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37644 cost += 20;
37646 return cost;
37649 /* Moves between SSE/MMX and integer unit are expensive. */
37650 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37651 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37653 /* ??? By keeping returned value relatively high, we limit the number
37654 of moves between integer and MMX/SSE registers for all targets.
37655 Additionally, high value prevents problem with x86_modes_tieable_p(),
37656 where integer modes in MMX/SSE registers are not tieable
37657 because of missing QImode and HImode moves to, from or between
37658 MMX/SSE registers. */
37659 return MAX (8, ix86_cost->mmxsse_to_integer);
37661 if (MAYBE_FLOAT_CLASS_P (class1))
37662 return ix86_cost->fp_move;
37663 if (MAYBE_SSE_CLASS_P (class1))
37664 return ix86_cost->sse_move;
37665 if (MAYBE_MMX_CLASS_P (class1))
37666 return ix86_cost->mmx_move;
37667 return 2;
37670 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37671 MODE. */
37673 bool
37674 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37676 /* Flags and only flags can only hold CCmode values. */
37677 if (CC_REGNO_P (regno))
37678 return GET_MODE_CLASS (mode) == MODE_CC;
37679 if (GET_MODE_CLASS (mode) == MODE_CC
37680 || GET_MODE_CLASS (mode) == MODE_RANDOM
37681 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37682 return false;
37683 if (STACK_REGNO_P (regno))
37684 return VALID_FP_MODE_P (mode);
37685 if (MASK_REGNO_P (regno))
37686 return VALID_MASK_REG_MODE (mode);
37687 if (SSE_REGNO_P (regno))
37689 /* We implement the move patterns for all vector modes into and
37690 out of SSE registers, even when no operation instructions
37691 are available. */
37693 /* For AVX-512 we allow, regardless of regno:
37694 - XI mode
37695 - any of 512-bit wide vector mode
37696 - any scalar mode. */
37697 if (TARGET_AVX512F
37698 && (mode == XImode
37699 || VALID_AVX512F_REG_MODE (mode)
37700 || VALID_AVX512F_SCALAR_MODE (mode)))
37701 return true;
37703 /* xmm16-xmm31 are only available for AVX-512. */
37704 if (EXT_REX_SSE_REGNO_P (regno))
37705 return false;
37707 /* OImode and AVX modes are available only when AVX is enabled. */
37708 return ((TARGET_AVX
37709 && VALID_AVX256_REG_OR_OI_MODE (mode))
37710 || VALID_SSE_REG_MODE (mode)
37711 || VALID_SSE2_REG_MODE (mode)
37712 || VALID_MMX_REG_MODE (mode)
37713 || VALID_MMX_REG_MODE_3DNOW (mode));
37715 if (MMX_REGNO_P (regno))
37717 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37718 so if the register is available at all, then we can move data of
37719 the given mode into or out of it. */
37720 return (VALID_MMX_REG_MODE (mode)
37721 || VALID_MMX_REG_MODE_3DNOW (mode));
37724 if (mode == QImode)
37726 /* Take care for QImode values - they can be in non-QI regs,
37727 but then they do cause partial register stalls. */
37728 if (ANY_QI_REGNO_P (regno))
37729 return true;
37730 if (!TARGET_PARTIAL_REG_STALL)
37731 return true;
37732 /* LRA checks if the hard register is OK for the given mode.
37733 QImode values can live in non-QI regs, so we allow all
37734 registers here. */
37735 if (lra_in_progress)
37736 return true;
37737 return !can_create_pseudo_p ();
37739 /* We handle both integer and floats in the general purpose registers. */
37740 else if (VALID_INT_MODE_P (mode))
37741 return true;
37742 else if (VALID_FP_MODE_P (mode))
37743 return true;
37744 else if (VALID_DFP_MODE_P (mode))
37745 return true;
37746 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37747 on to use that value in smaller contexts, this can easily force a
37748 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37749 supporting DImode, allow it. */
37750 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37751 return true;
37753 return false;
37756 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37757 tieable integer mode. */
37759 static bool
37760 ix86_tieable_integer_mode_p (enum machine_mode mode)
37762 switch (mode)
37764 case HImode:
37765 case SImode:
37766 return true;
37768 case QImode:
37769 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37771 case DImode:
37772 return TARGET_64BIT;
37774 default:
37775 return false;
37779 /* Return true if MODE1 is accessible in a register that can hold MODE2
37780 without copying. That is, all register classes that can hold MODE2
37781 can also hold MODE1. */
37783 bool
37784 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37786 if (mode1 == mode2)
37787 return true;
37789 if (ix86_tieable_integer_mode_p (mode1)
37790 && ix86_tieable_integer_mode_p (mode2))
37791 return true;
37793 /* MODE2 being XFmode implies fp stack or general regs, which means we
37794 can tie any smaller floating point modes to it. Note that we do not
37795 tie this with TFmode. */
37796 if (mode2 == XFmode)
37797 return mode1 == SFmode || mode1 == DFmode;
37799 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37800 that we can tie it with SFmode. */
37801 if (mode2 == DFmode)
37802 return mode1 == SFmode;
37804 /* If MODE2 is only appropriate for an SSE register, then tie with
37805 any other mode acceptable to SSE registers. */
37806 if (GET_MODE_SIZE (mode2) == 32
37807 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37808 return (GET_MODE_SIZE (mode1) == 32
37809 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37810 if (GET_MODE_SIZE (mode2) == 16
37811 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37812 return (GET_MODE_SIZE (mode1) == 16
37813 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37815 /* If MODE2 is appropriate for an MMX register, then tie
37816 with any other mode acceptable to MMX registers. */
37817 if (GET_MODE_SIZE (mode2) == 8
37818 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37819 return (GET_MODE_SIZE (mode1) == 8
37820 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37822 return false;
37825 /* Return the cost of moving between two registers of mode MODE. */
37827 static int
37828 ix86_set_reg_reg_cost (enum machine_mode mode)
37830 unsigned int units = UNITS_PER_WORD;
37832 switch (GET_MODE_CLASS (mode))
37834 default:
37835 break;
37837 case MODE_CC:
37838 units = GET_MODE_SIZE (CCmode);
37839 break;
37841 case MODE_FLOAT:
37842 if ((TARGET_SSE && mode == TFmode)
37843 || (TARGET_80387 && mode == XFmode)
37844 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37845 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37846 units = GET_MODE_SIZE (mode);
37847 break;
37849 case MODE_COMPLEX_FLOAT:
37850 if ((TARGET_SSE && mode == TCmode)
37851 || (TARGET_80387 && mode == XCmode)
37852 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37853 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37854 units = GET_MODE_SIZE (mode);
37855 break;
37857 case MODE_VECTOR_INT:
37858 case MODE_VECTOR_FLOAT:
37859 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37860 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37861 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37862 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37863 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37864 units = GET_MODE_SIZE (mode);
37867 /* Return the cost of moving between two registers of mode MODE,
37868 assuming that the move will be in pieces of at most UNITS bytes. */
37869 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37872 /* Compute a (partial) cost for rtx X. Return true if the complete
37873 cost has been computed, and false if subexpressions should be
37874 scanned. In either case, *TOTAL contains the cost result. */
37876 static bool
37877 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37878 bool speed)
37880 rtx mask;
37881 enum rtx_code code = (enum rtx_code) code_i;
37882 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37883 enum machine_mode mode = GET_MODE (x);
37884 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37886 switch (code)
37888 case SET:
37889 if (register_operand (SET_DEST (x), VOIDmode)
37890 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37892 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37893 return true;
37895 return false;
37897 case CONST_INT:
37898 case CONST:
37899 case LABEL_REF:
37900 case SYMBOL_REF:
37901 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37902 *total = 3;
37903 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37904 *total = 2;
37905 else if (flag_pic && SYMBOLIC_CONST (x)
37906 && !(TARGET_64BIT
37907 && (GET_CODE (x) == LABEL_REF
37908 || (GET_CODE (x) == SYMBOL_REF
37909 && SYMBOL_REF_LOCAL_P (x)))))
37910 *total = 1;
37911 else
37912 *total = 0;
37913 return true;
37915 case CONST_DOUBLE:
37916 if (mode == VOIDmode)
37918 *total = 0;
37919 return true;
37921 switch (standard_80387_constant_p (x))
37923 case 1: /* 0.0 */
37924 *total = 1;
37925 return true;
37926 default: /* Other constants */
37927 *total = 2;
37928 return true;
37929 case 0:
37930 case -1:
37931 break;
37933 if (SSE_FLOAT_MODE_P (mode))
37935 case CONST_VECTOR:
37936 switch (standard_sse_constant_p (x))
37938 case 0:
37939 break;
37940 case 1: /* 0: xor eliminates false dependency */
37941 *total = 0;
37942 return true;
37943 default: /* -1: cmp contains false dependency */
37944 *total = 1;
37945 return true;
37948 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37949 it'll probably end up. Add a penalty for size. */
37950 *total = (COSTS_N_INSNS (1)
37951 + (flag_pic != 0 && !TARGET_64BIT)
37952 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37953 return true;
37955 case ZERO_EXTEND:
37956 /* The zero extensions is often completely free on x86_64, so make
37957 it as cheap as possible. */
37958 if (TARGET_64BIT && mode == DImode
37959 && GET_MODE (XEXP (x, 0)) == SImode)
37960 *total = 1;
37961 else if (TARGET_ZERO_EXTEND_WITH_AND)
37962 *total = cost->add;
37963 else
37964 *total = cost->movzx;
37965 return false;
37967 case SIGN_EXTEND:
37968 *total = cost->movsx;
37969 return false;
37971 case ASHIFT:
37972 if (SCALAR_INT_MODE_P (mode)
37973 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37974 && CONST_INT_P (XEXP (x, 1)))
37976 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37977 if (value == 1)
37979 *total = cost->add;
37980 return false;
37982 if ((value == 2 || value == 3)
37983 && cost->lea <= cost->shift_const)
37985 *total = cost->lea;
37986 return false;
37989 /* FALLTHRU */
37991 case ROTATE:
37992 case ASHIFTRT:
37993 case LSHIFTRT:
37994 case ROTATERT:
37995 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37997 /* ??? Should be SSE vector operation cost. */
37998 /* At least for published AMD latencies, this really is the same
37999 as the latency for a simple fpu operation like fabs. */
38000 /* V*QImode is emulated with 1-11 insns. */
38001 if (mode == V16QImode || mode == V32QImode)
38003 int count = 11;
38004 if (TARGET_XOP && mode == V16QImode)
38006 /* For XOP we use vpshab, which requires a broadcast of the
38007 value to the variable shift insn. For constants this
38008 means a V16Q const in mem; even when we can perform the
38009 shift with one insn set the cost to prefer paddb. */
38010 if (CONSTANT_P (XEXP (x, 1)))
38012 *total = (cost->fabs
38013 + rtx_cost (XEXP (x, 0), code, 0, speed)
38014 + (speed ? 2 : COSTS_N_BYTES (16)));
38015 return true;
38017 count = 3;
38019 else if (TARGET_SSSE3)
38020 count = 7;
38021 *total = cost->fabs * count;
38023 else
38024 *total = cost->fabs;
38026 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38028 if (CONST_INT_P (XEXP (x, 1)))
38030 if (INTVAL (XEXP (x, 1)) > 32)
38031 *total = cost->shift_const + COSTS_N_INSNS (2);
38032 else
38033 *total = cost->shift_const * 2;
38035 else
38037 if (GET_CODE (XEXP (x, 1)) == AND)
38038 *total = cost->shift_var * 2;
38039 else
38040 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38043 else
38045 if (CONST_INT_P (XEXP (x, 1)))
38046 *total = cost->shift_const;
38047 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38048 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38050 /* Return the cost after shift-and truncation. */
38051 *total = cost->shift_var;
38052 return true;
38054 else
38055 *total = cost->shift_var;
38057 return false;
38059 case FMA:
38061 rtx sub;
38063 gcc_assert (FLOAT_MODE_P (mode));
38064 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38066 /* ??? SSE scalar/vector cost should be used here. */
38067 /* ??? Bald assumption that fma has the same cost as fmul. */
38068 *total = cost->fmul;
38069 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38071 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38072 sub = XEXP (x, 0);
38073 if (GET_CODE (sub) == NEG)
38074 sub = XEXP (sub, 0);
38075 *total += rtx_cost (sub, FMA, 0, speed);
38077 sub = XEXP (x, 2);
38078 if (GET_CODE (sub) == NEG)
38079 sub = XEXP (sub, 0);
38080 *total += rtx_cost (sub, FMA, 2, speed);
38081 return true;
38084 case MULT:
38085 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38087 /* ??? SSE scalar cost should be used here. */
38088 *total = cost->fmul;
38089 return false;
38091 else if (X87_FLOAT_MODE_P (mode))
38093 *total = cost->fmul;
38094 return false;
38096 else if (FLOAT_MODE_P (mode))
38098 /* ??? SSE vector cost should be used here. */
38099 *total = cost->fmul;
38100 return false;
38102 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38104 /* V*QImode is emulated with 7-13 insns. */
38105 if (mode == V16QImode || mode == V32QImode)
38107 int extra = 11;
38108 if (TARGET_XOP && mode == V16QImode)
38109 extra = 5;
38110 else if (TARGET_SSSE3)
38111 extra = 6;
38112 *total = cost->fmul * 2 + cost->fabs * extra;
38114 /* V*DImode is emulated with 5-8 insns. */
38115 else if (mode == V2DImode || mode == V4DImode)
38117 if (TARGET_XOP && mode == V2DImode)
38118 *total = cost->fmul * 2 + cost->fabs * 3;
38119 else
38120 *total = cost->fmul * 3 + cost->fabs * 5;
38122 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38123 insns, including two PMULUDQ. */
38124 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38125 *total = cost->fmul * 2 + cost->fabs * 5;
38126 else
38127 *total = cost->fmul;
38128 return false;
38130 else
38132 rtx op0 = XEXP (x, 0);
38133 rtx op1 = XEXP (x, 1);
38134 int nbits;
38135 if (CONST_INT_P (XEXP (x, 1)))
38137 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38138 for (nbits = 0; value != 0; value &= value - 1)
38139 nbits++;
38141 else
38142 /* This is arbitrary. */
38143 nbits = 7;
38145 /* Compute costs correctly for widening multiplication. */
38146 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38147 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38148 == GET_MODE_SIZE (mode))
38150 int is_mulwiden = 0;
38151 enum machine_mode inner_mode = GET_MODE (op0);
38153 if (GET_CODE (op0) == GET_CODE (op1))
38154 is_mulwiden = 1, op1 = XEXP (op1, 0);
38155 else if (CONST_INT_P (op1))
38157 if (GET_CODE (op0) == SIGN_EXTEND)
38158 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38159 == INTVAL (op1);
38160 else
38161 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38164 if (is_mulwiden)
38165 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38168 *total = (cost->mult_init[MODE_INDEX (mode)]
38169 + nbits * cost->mult_bit
38170 + rtx_cost (op0, outer_code, opno, speed)
38171 + rtx_cost (op1, outer_code, opno, speed));
38173 return true;
38176 case DIV:
38177 case UDIV:
38178 case MOD:
38179 case UMOD:
38180 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38181 /* ??? SSE cost should be used here. */
38182 *total = cost->fdiv;
38183 else if (X87_FLOAT_MODE_P (mode))
38184 *total = cost->fdiv;
38185 else if (FLOAT_MODE_P (mode))
38186 /* ??? SSE vector cost should be used here. */
38187 *total = cost->fdiv;
38188 else
38189 *total = cost->divide[MODE_INDEX (mode)];
38190 return false;
38192 case PLUS:
38193 if (GET_MODE_CLASS (mode) == MODE_INT
38194 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38196 if (GET_CODE (XEXP (x, 0)) == PLUS
38197 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38198 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38199 && CONSTANT_P (XEXP (x, 1)))
38201 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38202 if (val == 2 || val == 4 || val == 8)
38204 *total = cost->lea;
38205 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38206 outer_code, opno, speed);
38207 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38208 outer_code, opno, speed);
38209 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38210 return true;
38213 else if (GET_CODE (XEXP (x, 0)) == MULT
38214 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38216 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38217 if (val == 2 || val == 4 || val == 8)
38219 *total = cost->lea;
38220 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38221 outer_code, opno, speed);
38222 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38223 return true;
38226 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38228 *total = cost->lea;
38229 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38230 outer_code, opno, speed);
38231 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38232 outer_code, opno, speed);
38233 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38234 return true;
38237 /* FALLTHRU */
38239 case MINUS:
38240 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38242 /* ??? SSE cost should be used here. */
38243 *total = cost->fadd;
38244 return false;
38246 else if (X87_FLOAT_MODE_P (mode))
38248 *total = cost->fadd;
38249 return false;
38251 else if (FLOAT_MODE_P (mode))
38253 /* ??? SSE vector cost should be used here. */
38254 *total = cost->fadd;
38255 return false;
38257 /* FALLTHRU */
38259 case AND:
38260 case IOR:
38261 case XOR:
38262 if (GET_MODE_CLASS (mode) == MODE_INT
38263 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38265 *total = (cost->add * 2
38266 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38267 << (GET_MODE (XEXP (x, 0)) != DImode))
38268 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38269 << (GET_MODE (XEXP (x, 1)) != DImode)));
38270 return true;
38272 /* FALLTHRU */
38274 case NEG:
38275 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38277 /* ??? SSE cost should be used here. */
38278 *total = cost->fchs;
38279 return false;
38281 else if (X87_FLOAT_MODE_P (mode))
38283 *total = cost->fchs;
38284 return false;
38286 else if (FLOAT_MODE_P (mode))
38288 /* ??? SSE vector cost should be used here. */
38289 *total = cost->fchs;
38290 return false;
38292 /* FALLTHRU */
38294 case NOT:
38295 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38297 /* ??? Should be SSE vector operation cost. */
38298 /* At least for published AMD latencies, this really is the same
38299 as the latency for a simple fpu operation like fabs. */
38300 *total = cost->fabs;
38302 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38303 *total = cost->add * 2;
38304 else
38305 *total = cost->add;
38306 return false;
38308 case COMPARE:
38309 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38310 && XEXP (XEXP (x, 0), 1) == const1_rtx
38311 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38312 && XEXP (x, 1) == const0_rtx)
38314 /* This kind of construct is implemented using test[bwl].
38315 Treat it as if we had an AND. */
38316 *total = (cost->add
38317 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38318 + rtx_cost (const1_rtx, outer_code, opno, speed));
38319 return true;
38321 return false;
38323 case FLOAT_EXTEND:
38324 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38325 *total = 0;
38326 return false;
38328 case ABS:
38329 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38330 /* ??? SSE cost should be used here. */
38331 *total = cost->fabs;
38332 else if (X87_FLOAT_MODE_P (mode))
38333 *total = cost->fabs;
38334 else if (FLOAT_MODE_P (mode))
38335 /* ??? SSE vector cost should be used here. */
38336 *total = cost->fabs;
38337 return false;
38339 case SQRT:
38340 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38341 /* ??? SSE cost should be used here. */
38342 *total = cost->fsqrt;
38343 else if (X87_FLOAT_MODE_P (mode))
38344 *total = cost->fsqrt;
38345 else if (FLOAT_MODE_P (mode))
38346 /* ??? SSE vector cost should be used here. */
38347 *total = cost->fsqrt;
38348 return false;
38350 case UNSPEC:
38351 if (XINT (x, 1) == UNSPEC_TP)
38352 *total = 0;
38353 return false;
38355 case VEC_SELECT:
38356 case VEC_CONCAT:
38357 case VEC_DUPLICATE:
38358 /* ??? Assume all of these vector manipulation patterns are
38359 recognizable. In which case they all pretty much have the
38360 same cost. */
38361 *total = cost->fabs;
38362 return true;
38363 case VEC_MERGE:
38364 mask = XEXP (x, 2);
38365 /* This is masked instruction, assume the same cost,
38366 as nonmasked variant. */
38367 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38368 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38369 else
38370 *total = cost->fabs;
38371 return true;
38373 default:
38374 return false;
38378 #if TARGET_MACHO
38380 static int current_machopic_label_num;
38382 /* Given a symbol name and its associated stub, write out the
38383 definition of the stub. */
38385 void
38386 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38388 unsigned int length;
38389 char *binder_name, *symbol_name, lazy_ptr_name[32];
38390 int label = ++current_machopic_label_num;
38392 /* For 64-bit we shouldn't get here. */
38393 gcc_assert (!TARGET_64BIT);
38395 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38396 symb = targetm.strip_name_encoding (symb);
38398 length = strlen (stub);
38399 binder_name = XALLOCAVEC (char, length + 32);
38400 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38402 length = strlen (symb);
38403 symbol_name = XALLOCAVEC (char, length + 32);
38404 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38406 sprintf (lazy_ptr_name, "L%d$lz", label);
38408 if (MACHOPIC_ATT_STUB)
38409 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38410 else if (MACHOPIC_PURE)
38411 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38412 else
38413 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38415 fprintf (file, "%s:\n", stub);
38416 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38418 if (MACHOPIC_ATT_STUB)
38420 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38422 else if (MACHOPIC_PURE)
38424 /* PIC stub. */
38425 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38426 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38427 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38428 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38429 label, lazy_ptr_name, label);
38430 fprintf (file, "\tjmp\t*%%ecx\n");
38432 else
38433 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38435 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38436 it needs no stub-binding-helper. */
38437 if (MACHOPIC_ATT_STUB)
38438 return;
38440 fprintf (file, "%s:\n", binder_name);
38442 if (MACHOPIC_PURE)
38444 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38445 fprintf (file, "\tpushl\t%%ecx\n");
38447 else
38448 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38450 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38452 /* N.B. Keep the correspondence of these
38453 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38454 old-pic/new-pic/non-pic stubs; altering this will break
38455 compatibility with existing dylibs. */
38456 if (MACHOPIC_PURE)
38458 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38459 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38461 else
38462 /* 16-byte -mdynamic-no-pic stub. */
38463 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38465 fprintf (file, "%s:\n", lazy_ptr_name);
38466 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38467 fprintf (file, ASM_LONG "%s\n", binder_name);
38469 #endif /* TARGET_MACHO */
38471 /* Order the registers for register allocator. */
38473 void
38474 x86_order_regs_for_local_alloc (void)
38476 int pos = 0;
38477 int i;
38479 /* First allocate the local general purpose registers. */
38480 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38481 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38482 reg_alloc_order [pos++] = i;
38484 /* Global general purpose registers. */
38485 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38486 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38487 reg_alloc_order [pos++] = i;
38489 /* x87 registers come first in case we are doing FP math
38490 using them. */
38491 if (!TARGET_SSE_MATH)
38492 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38493 reg_alloc_order [pos++] = i;
38495 /* SSE registers. */
38496 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38497 reg_alloc_order [pos++] = i;
38498 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38499 reg_alloc_order [pos++] = i;
38501 /* Extended REX SSE registers. */
38502 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38503 reg_alloc_order [pos++] = i;
38505 /* Mask register. */
38506 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38507 reg_alloc_order [pos++] = i;
38509 /* x87 registers. */
38510 if (TARGET_SSE_MATH)
38511 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38512 reg_alloc_order [pos++] = i;
38514 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38515 reg_alloc_order [pos++] = i;
38517 /* Initialize the rest of array as we do not allocate some registers
38518 at all. */
38519 while (pos < FIRST_PSEUDO_REGISTER)
38520 reg_alloc_order [pos++] = 0;
38523 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38524 in struct attribute_spec handler. */
38525 static tree
38526 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38527 tree args,
38528 int flags ATTRIBUTE_UNUSED,
38529 bool *no_add_attrs)
38531 if (TREE_CODE (*node) != FUNCTION_TYPE
38532 && TREE_CODE (*node) != METHOD_TYPE
38533 && TREE_CODE (*node) != FIELD_DECL
38534 && TREE_CODE (*node) != TYPE_DECL)
38536 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38537 name);
38538 *no_add_attrs = true;
38539 return NULL_TREE;
38541 if (TARGET_64BIT)
38543 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38544 name);
38545 *no_add_attrs = true;
38546 return NULL_TREE;
38548 if (is_attribute_p ("callee_pop_aggregate_return", name))
38550 tree cst;
38552 cst = TREE_VALUE (args);
38553 if (TREE_CODE (cst) != INTEGER_CST)
38555 warning (OPT_Wattributes,
38556 "%qE attribute requires an integer constant argument",
38557 name);
38558 *no_add_attrs = true;
38560 else if (compare_tree_int (cst, 0) != 0
38561 && compare_tree_int (cst, 1) != 0)
38563 warning (OPT_Wattributes,
38564 "argument to %qE attribute is neither zero, nor one",
38565 name);
38566 *no_add_attrs = true;
38569 return NULL_TREE;
38572 return NULL_TREE;
38575 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38576 struct attribute_spec.handler. */
38577 static tree
38578 ix86_handle_abi_attribute (tree *node, tree name,
38579 tree args ATTRIBUTE_UNUSED,
38580 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38582 if (TREE_CODE (*node) != FUNCTION_TYPE
38583 && TREE_CODE (*node) != METHOD_TYPE
38584 && TREE_CODE (*node) != FIELD_DECL
38585 && TREE_CODE (*node) != TYPE_DECL)
38587 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38588 name);
38589 *no_add_attrs = true;
38590 return NULL_TREE;
38593 /* Can combine regparm with all attributes but fastcall. */
38594 if (is_attribute_p ("ms_abi", name))
38596 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38598 error ("ms_abi and sysv_abi attributes are not compatible");
38601 return NULL_TREE;
38603 else if (is_attribute_p ("sysv_abi", name))
38605 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38607 error ("ms_abi and sysv_abi attributes are not compatible");
38610 return NULL_TREE;
38613 return NULL_TREE;
38616 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38617 struct attribute_spec.handler. */
38618 static tree
38619 ix86_handle_struct_attribute (tree *node, tree name,
38620 tree args ATTRIBUTE_UNUSED,
38621 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38623 tree *type = NULL;
38624 if (DECL_P (*node))
38626 if (TREE_CODE (*node) == TYPE_DECL)
38627 type = &TREE_TYPE (*node);
38629 else
38630 type = node;
38632 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38634 warning (OPT_Wattributes, "%qE attribute ignored",
38635 name);
38636 *no_add_attrs = true;
38639 else if ((is_attribute_p ("ms_struct", name)
38640 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38641 || ((is_attribute_p ("gcc_struct", name)
38642 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38644 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38645 name);
38646 *no_add_attrs = true;
38649 return NULL_TREE;
38652 static tree
38653 ix86_handle_fndecl_attribute (tree *node, tree name,
38654 tree args ATTRIBUTE_UNUSED,
38655 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38657 if (TREE_CODE (*node) != FUNCTION_DECL)
38659 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38660 name);
38661 *no_add_attrs = true;
38663 return NULL_TREE;
38666 static bool
38667 ix86_ms_bitfield_layout_p (const_tree record_type)
38669 return ((TARGET_MS_BITFIELD_LAYOUT
38670 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38671 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38674 /* Returns an expression indicating where the this parameter is
38675 located on entry to the FUNCTION. */
38677 static rtx
38678 x86_this_parameter (tree function)
38680 tree type = TREE_TYPE (function);
38681 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38682 int nregs;
38684 if (TARGET_64BIT)
38686 const int *parm_regs;
38688 if (ix86_function_type_abi (type) == MS_ABI)
38689 parm_regs = x86_64_ms_abi_int_parameter_registers;
38690 else
38691 parm_regs = x86_64_int_parameter_registers;
38692 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38695 nregs = ix86_function_regparm (type, function);
38697 if (nregs > 0 && !stdarg_p (type))
38699 int regno;
38700 unsigned int ccvt = ix86_get_callcvt (type);
38702 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38703 regno = aggr ? DX_REG : CX_REG;
38704 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38706 regno = CX_REG;
38707 if (aggr)
38708 return gen_rtx_MEM (SImode,
38709 plus_constant (Pmode, stack_pointer_rtx, 4));
38711 else
38713 regno = AX_REG;
38714 if (aggr)
38716 regno = DX_REG;
38717 if (nregs == 1)
38718 return gen_rtx_MEM (SImode,
38719 plus_constant (Pmode,
38720 stack_pointer_rtx, 4));
38723 return gen_rtx_REG (SImode, regno);
38726 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38727 aggr ? 8 : 4));
38730 /* Determine whether x86_output_mi_thunk can succeed. */
38732 static bool
38733 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38734 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38735 HOST_WIDE_INT vcall_offset, const_tree function)
38737 /* 64-bit can handle anything. */
38738 if (TARGET_64BIT)
38739 return true;
38741 /* For 32-bit, everything's fine if we have one free register. */
38742 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38743 return true;
38745 /* Need a free register for vcall_offset. */
38746 if (vcall_offset)
38747 return false;
38749 /* Need a free register for GOT references. */
38750 if (flag_pic && !targetm.binds_local_p (function))
38751 return false;
38753 /* Otherwise ok. */
38754 return true;
38757 /* Output the assembler code for a thunk function. THUNK_DECL is the
38758 declaration for the thunk function itself, FUNCTION is the decl for
38759 the target function. DELTA is an immediate constant offset to be
38760 added to THIS. If VCALL_OFFSET is nonzero, the word at
38761 *(*this + vcall_offset) should be added to THIS. */
38763 static void
38764 x86_output_mi_thunk (FILE *file,
38765 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38766 HOST_WIDE_INT vcall_offset, tree function)
38768 rtx this_param = x86_this_parameter (function);
38769 rtx this_reg, tmp, fnaddr;
38770 unsigned int tmp_regno;
38772 if (TARGET_64BIT)
38773 tmp_regno = R10_REG;
38774 else
38776 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38777 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38778 tmp_regno = AX_REG;
38779 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38780 tmp_regno = DX_REG;
38781 else
38782 tmp_regno = CX_REG;
38785 emit_note (NOTE_INSN_PROLOGUE_END);
38787 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38788 pull it in now and let DELTA benefit. */
38789 if (REG_P (this_param))
38790 this_reg = this_param;
38791 else if (vcall_offset)
38793 /* Put the this parameter into %eax. */
38794 this_reg = gen_rtx_REG (Pmode, AX_REG);
38795 emit_move_insn (this_reg, this_param);
38797 else
38798 this_reg = NULL_RTX;
38800 /* Adjust the this parameter by a fixed constant. */
38801 if (delta)
38803 rtx delta_rtx = GEN_INT (delta);
38804 rtx delta_dst = this_reg ? this_reg : this_param;
38806 if (TARGET_64BIT)
38808 if (!x86_64_general_operand (delta_rtx, Pmode))
38810 tmp = gen_rtx_REG (Pmode, tmp_regno);
38811 emit_move_insn (tmp, delta_rtx);
38812 delta_rtx = tmp;
38816 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38819 /* Adjust the this parameter by a value stored in the vtable. */
38820 if (vcall_offset)
38822 rtx vcall_addr, vcall_mem, this_mem;
38824 tmp = gen_rtx_REG (Pmode, tmp_regno);
38826 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38827 if (Pmode != ptr_mode)
38828 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38829 emit_move_insn (tmp, this_mem);
38831 /* Adjust the this parameter. */
38832 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38833 if (TARGET_64BIT
38834 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38836 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38837 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38838 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38841 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38842 if (Pmode != ptr_mode)
38843 emit_insn (gen_addsi_1_zext (this_reg,
38844 gen_rtx_REG (ptr_mode,
38845 REGNO (this_reg)),
38846 vcall_mem));
38847 else
38848 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38851 /* If necessary, drop THIS back to its stack slot. */
38852 if (this_reg && this_reg != this_param)
38853 emit_move_insn (this_param, this_reg);
38855 fnaddr = XEXP (DECL_RTL (function), 0);
38856 if (TARGET_64BIT)
38858 if (!flag_pic || targetm.binds_local_p (function)
38859 || TARGET_PECOFF)
38861 else
38863 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38864 tmp = gen_rtx_CONST (Pmode, tmp);
38865 fnaddr = gen_const_mem (Pmode, tmp);
38868 else
38870 if (!flag_pic || targetm.binds_local_p (function))
38872 #if TARGET_MACHO
38873 else if (TARGET_MACHO)
38875 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38876 fnaddr = XEXP (fnaddr, 0);
38878 #endif /* TARGET_MACHO */
38879 else
38881 tmp = gen_rtx_REG (Pmode, CX_REG);
38882 output_set_got (tmp, NULL_RTX);
38884 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38885 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38886 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38887 fnaddr = gen_const_mem (Pmode, fnaddr);
38891 /* Our sibling call patterns do not allow memories, because we have no
38892 predicate that can distinguish between frame and non-frame memory.
38893 For our purposes here, we can get away with (ab)using a jump pattern,
38894 because we're going to do no optimization. */
38895 if (MEM_P (fnaddr))
38896 emit_jump_insn (gen_indirect_jump (fnaddr));
38897 else
38899 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38900 fnaddr = legitimize_pic_address (fnaddr,
38901 gen_rtx_REG (Pmode, tmp_regno));
38903 if (!sibcall_insn_operand (fnaddr, word_mode))
38905 tmp = gen_rtx_REG (word_mode, tmp_regno);
38906 if (GET_MODE (fnaddr) != word_mode)
38907 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38908 emit_move_insn (tmp, fnaddr);
38909 fnaddr = tmp;
38912 tmp = gen_rtx_MEM (QImode, fnaddr);
38913 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38914 tmp = emit_call_insn (tmp);
38915 SIBLING_CALL_P (tmp) = 1;
38917 emit_barrier ();
38919 /* Emit just enough of rest_of_compilation to get the insns emitted.
38920 Note that use_thunk calls assemble_start_function et al. */
38921 tmp = get_insns ();
38922 shorten_branches (tmp);
38923 final_start_function (tmp, file, 1);
38924 final (tmp, file, 1);
38925 final_end_function ();
38928 static void
38929 x86_file_start (void)
38931 default_file_start ();
38932 if (TARGET_16BIT)
38933 fputs ("\t.code16gcc\n", asm_out_file);
38934 #if TARGET_MACHO
38935 darwin_file_start ();
38936 #endif
38937 if (X86_FILE_START_VERSION_DIRECTIVE)
38938 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38939 if (X86_FILE_START_FLTUSED)
38940 fputs ("\t.global\t__fltused\n", asm_out_file);
38941 if (ix86_asm_dialect == ASM_INTEL)
38942 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38946 x86_field_alignment (tree field, int computed)
38948 enum machine_mode mode;
38949 tree type = TREE_TYPE (field);
38951 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38952 return computed;
38953 mode = TYPE_MODE (strip_array_types (type));
38954 if (mode == DFmode || mode == DCmode
38955 || GET_MODE_CLASS (mode) == MODE_INT
38956 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38957 return MIN (32, computed);
38958 return computed;
38961 /* Output assembler code to FILE to increment profiler label # LABELNO
38962 for profiling a function entry. */
38963 void
38964 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38966 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38967 : MCOUNT_NAME);
38969 if (TARGET_64BIT)
38971 #ifndef NO_PROFILE_COUNTERS
38972 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38973 #endif
38975 if (!TARGET_PECOFF && flag_pic)
38976 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38977 else
38978 fprintf (file, "\tcall\t%s\n", mcount_name);
38980 else if (flag_pic)
38982 #ifndef NO_PROFILE_COUNTERS
38983 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38984 LPREFIX, labelno);
38985 #endif
38986 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38988 else
38990 #ifndef NO_PROFILE_COUNTERS
38991 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38992 LPREFIX, labelno);
38993 #endif
38994 fprintf (file, "\tcall\t%s\n", mcount_name);
38998 /* We don't have exact information about the insn sizes, but we may assume
38999 quite safely that we are informed about all 1 byte insns and memory
39000 address sizes. This is enough to eliminate unnecessary padding in
39001 99% of cases. */
39003 static int
39004 min_insn_size (rtx insn)
39006 int l = 0, len;
39008 if (!INSN_P (insn) || !active_insn_p (insn))
39009 return 0;
39011 /* Discard alignments we've emit and jump instructions. */
39012 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39013 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39014 return 0;
39016 /* Important case - calls are always 5 bytes.
39017 It is common to have many calls in the row. */
39018 if (CALL_P (insn)
39019 && symbolic_reference_mentioned_p (PATTERN (insn))
39020 && !SIBLING_CALL_P (insn))
39021 return 5;
39022 len = get_attr_length (insn);
39023 if (len <= 1)
39024 return 1;
39026 /* For normal instructions we rely on get_attr_length being exact,
39027 with a few exceptions. */
39028 if (!JUMP_P (insn))
39030 enum attr_type type = get_attr_type (insn);
39032 switch (type)
39034 case TYPE_MULTI:
39035 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39036 || asm_noperands (PATTERN (insn)) >= 0)
39037 return 0;
39038 break;
39039 case TYPE_OTHER:
39040 case TYPE_FCMP:
39041 break;
39042 default:
39043 /* Otherwise trust get_attr_length. */
39044 return len;
39047 l = get_attr_length_address (insn);
39048 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39049 l = 4;
39051 if (l)
39052 return 1+l;
39053 else
39054 return 2;
39057 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39059 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39060 window. */
39062 static void
39063 ix86_avoid_jump_mispredicts (void)
39065 rtx insn, start = get_insns ();
39066 int nbytes = 0, njumps = 0;
39067 int isjump = 0;
39069 /* Look for all minimal intervals of instructions containing 4 jumps.
39070 The intervals are bounded by START and INSN. NBYTES is the total
39071 size of instructions in the interval including INSN and not including
39072 START. When the NBYTES is smaller than 16 bytes, it is possible
39073 that the end of START and INSN ends up in the same 16byte page.
39075 The smallest offset in the page INSN can start is the case where START
39076 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39077 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39079 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39080 have to, control transfer to label(s) can be performed through other
39081 means, and also we estimate minimum length of all asm stmts as 0. */
39082 for (insn = start; insn; insn = NEXT_INSN (insn))
39084 int min_size;
39086 if (LABEL_P (insn))
39088 int align = label_to_alignment (insn);
39089 int max_skip = label_to_max_skip (insn);
39091 if (max_skip > 15)
39092 max_skip = 15;
39093 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39094 already in the current 16 byte page, because otherwise
39095 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39096 bytes to reach 16 byte boundary. */
39097 if (align <= 0
39098 || (align <= 3 && max_skip != (1 << align) - 1))
39099 max_skip = 0;
39100 if (dump_file)
39101 fprintf (dump_file, "Label %i with max_skip %i\n",
39102 INSN_UID (insn), max_skip);
39103 if (max_skip)
39105 while (nbytes + max_skip >= 16)
39107 start = NEXT_INSN (start);
39108 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39109 || CALL_P (start))
39110 njumps--, isjump = 1;
39111 else
39112 isjump = 0;
39113 nbytes -= min_insn_size (start);
39116 continue;
39119 min_size = min_insn_size (insn);
39120 nbytes += min_size;
39121 if (dump_file)
39122 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39123 INSN_UID (insn), min_size);
39124 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39125 || CALL_P (insn))
39126 njumps++;
39127 else
39128 continue;
39130 while (njumps > 3)
39132 start = NEXT_INSN (start);
39133 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39134 || CALL_P (start))
39135 njumps--, isjump = 1;
39136 else
39137 isjump = 0;
39138 nbytes -= min_insn_size (start);
39140 gcc_assert (njumps >= 0);
39141 if (dump_file)
39142 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39143 INSN_UID (start), INSN_UID (insn), nbytes);
39145 if (njumps == 3 && isjump && nbytes < 16)
39147 int padsize = 15 - nbytes + min_insn_size (insn);
39149 if (dump_file)
39150 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39151 INSN_UID (insn), padsize);
39152 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39156 #endif
39158 /* AMD Athlon works faster
39159 when RET is not destination of conditional jump or directly preceded
39160 by other jump instruction. We avoid the penalty by inserting NOP just
39161 before the RET instructions in such cases. */
39162 static void
39163 ix86_pad_returns (void)
39165 edge e;
39166 edge_iterator ei;
39168 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39170 basic_block bb = e->src;
39171 rtx ret = BB_END (bb);
39172 rtx prev;
39173 bool replace = false;
39175 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39176 || optimize_bb_for_size_p (bb))
39177 continue;
39178 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39179 if (active_insn_p (prev) || LABEL_P (prev))
39180 break;
39181 if (prev && LABEL_P (prev))
39183 edge e;
39184 edge_iterator ei;
39186 FOR_EACH_EDGE (e, ei, bb->preds)
39187 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39188 && !(e->flags & EDGE_FALLTHRU))
39190 replace = true;
39191 break;
39194 if (!replace)
39196 prev = prev_active_insn (ret);
39197 if (prev
39198 && ((JUMP_P (prev) && any_condjump_p (prev))
39199 || CALL_P (prev)))
39200 replace = true;
39201 /* Empty functions get branch mispredict even when
39202 the jump destination is not visible to us. */
39203 if (!prev && !optimize_function_for_size_p (cfun))
39204 replace = true;
39206 if (replace)
39208 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39209 delete_insn (ret);
39214 /* Count the minimum number of instructions in BB. Return 4 if the
39215 number of instructions >= 4. */
39217 static int
39218 ix86_count_insn_bb (basic_block bb)
39220 rtx insn;
39221 int insn_count = 0;
39223 /* Count number of instructions in this block. Return 4 if the number
39224 of instructions >= 4. */
39225 FOR_BB_INSNS (bb, insn)
39227 /* Only happen in exit blocks. */
39228 if (JUMP_P (insn)
39229 && ANY_RETURN_P (PATTERN (insn)))
39230 break;
39232 if (NONDEBUG_INSN_P (insn)
39233 && GET_CODE (PATTERN (insn)) != USE
39234 && GET_CODE (PATTERN (insn)) != CLOBBER)
39236 insn_count++;
39237 if (insn_count >= 4)
39238 return insn_count;
39242 return insn_count;
39246 /* Count the minimum number of instructions in code path in BB.
39247 Return 4 if the number of instructions >= 4. */
39249 static int
39250 ix86_count_insn (basic_block bb)
39252 edge e;
39253 edge_iterator ei;
39254 int min_prev_count;
39256 /* Only bother counting instructions along paths with no
39257 more than 2 basic blocks between entry and exit. Given
39258 that BB has an edge to exit, determine if a predecessor
39259 of BB has an edge from entry. If so, compute the number
39260 of instructions in the predecessor block. If there
39261 happen to be multiple such blocks, compute the minimum. */
39262 min_prev_count = 4;
39263 FOR_EACH_EDGE (e, ei, bb->preds)
39265 edge prev_e;
39266 edge_iterator prev_ei;
39268 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39270 min_prev_count = 0;
39271 break;
39273 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39275 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39277 int count = ix86_count_insn_bb (e->src);
39278 if (count < min_prev_count)
39279 min_prev_count = count;
39280 break;
39285 if (min_prev_count < 4)
39286 min_prev_count += ix86_count_insn_bb (bb);
39288 return min_prev_count;
39291 /* Pad short function to 4 instructions. */
39293 static void
39294 ix86_pad_short_function (void)
39296 edge e;
39297 edge_iterator ei;
39299 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39301 rtx ret = BB_END (e->src);
39302 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39304 int insn_count = ix86_count_insn (e->src);
39306 /* Pad short function. */
39307 if (insn_count < 4)
39309 rtx insn = ret;
39311 /* Find epilogue. */
39312 while (insn
39313 && (!NOTE_P (insn)
39314 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39315 insn = PREV_INSN (insn);
39317 if (!insn)
39318 insn = ret;
39320 /* Two NOPs count as one instruction. */
39321 insn_count = 2 * (4 - insn_count);
39322 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39328 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39329 the epilogue, the Windows system unwinder will apply epilogue logic and
39330 produce incorrect offsets. This can be avoided by adding a nop between
39331 the last insn that can throw and the first insn of the epilogue. */
39333 static void
39334 ix86_seh_fixup_eh_fallthru (void)
39336 edge e;
39337 edge_iterator ei;
39339 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39341 rtx insn, next;
39343 /* Find the beginning of the epilogue. */
39344 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39345 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39346 break;
39347 if (insn == NULL)
39348 continue;
39350 /* We only care about preceding insns that can throw. */
39351 insn = prev_active_insn (insn);
39352 if (insn == NULL || !can_throw_internal (insn))
39353 continue;
39355 /* Do not separate calls from their debug information. */
39356 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39357 if (NOTE_P (next)
39358 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39359 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39360 insn = next;
39361 else
39362 break;
39364 emit_insn_after (gen_nops (const1_rtx), insn);
39368 /* Implement machine specific optimizations. We implement padding of returns
39369 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39370 static void
39371 ix86_reorg (void)
39373 /* We are freeing block_for_insn in the toplev to keep compatibility
39374 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39375 compute_bb_for_insn ();
39377 if (TARGET_SEH && current_function_has_exception_handlers ())
39378 ix86_seh_fixup_eh_fallthru ();
39380 if (optimize && optimize_function_for_speed_p (cfun))
39382 if (TARGET_PAD_SHORT_FUNCTION)
39383 ix86_pad_short_function ();
39384 else if (TARGET_PAD_RETURNS)
39385 ix86_pad_returns ();
39386 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39387 if (TARGET_FOUR_JUMP_LIMIT)
39388 ix86_avoid_jump_mispredicts ();
39389 #endif
39393 /* Return nonzero when QImode register that must be represented via REX prefix
39394 is used. */
39395 bool
39396 x86_extended_QIreg_mentioned_p (rtx insn)
39398 int i;
39399 extract_insn_cached (insn);
39400 for (i = 0; i < recog_data.n_operands; i++)
39401 if (GENERAL_REG_P (recog_data.operand[i])
39402 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39403 return true;
39404 return false;
39407 /* Return nonzero when P points to register encoded via REX prefix.
39408 Called via for_each_rtx. */
39409 static int
39410 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39412 unsigned int regno;
39413 if (!REG_P (*p))
39414 return 0;
39415 regno = REGNO (*p);
39416 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39419 /* Return true when INSN mentions register that must be encoded using REX
39420 prefix. */
39421 bool
39422 x86_extended_reg_mentioned_p (rtx insn)
39424 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39425 extended_reg_mentioned_1, NULL);
39428 /* If profitable, negate (without causing overflow) integer constant
39429 of mode MODE at location LOC. Return true in this case. */
39430 bool
39431 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39433 HOST_WIDE_INT val;
39435 if (!CONST_INT_P (*loc))
39436 return false;
39438 switch (mode)
39440 case DImode:
39441 /* DImode x86_64 constants must fit in 32 bits. */
39442 gcc_assert (x86_64_immediate_operand (*loc, mode));
39444 mode = SImode;
39445 break;
39447 case SImode:
39448 case HImode:
39449 case QImode:
39450 break;
39452 default:
39453 gcc_unreachable ();
39456 /* Avoid overflows. */
39457 if (mode_signbit_p (mode, *loc))
39458 return false;
39460 val = INTVAL (*loc);
39462 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39463 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39464 if ((val < 0 && val != -128)
39465 || val == 128)
39467 *loc = GEN_INT (-val);
39468 return true;
39471 return false;
39474 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39475 optabs would emit if we didn't have TFmode patterns. */
39477 void
39478 x86_emit_floatuns (rtx operands[2])
39480 rtx neglab, donelab, i0, i1, f0, in, out;
39481 enum machine_mode mode, inmode;
39483 inmode = GET_MODE (operands[1]);
39484 gcc_assert (inmode == SImode || inmode == DImode);
39486 out = operands[0];
39487 in = force_reg (inmode, operands[1]);
39488 mode = GET_MODE (out);
39489 neglab = gen_label_rtx ();
39490 donelab = gen_label_rtx ();
39491 f0 = gen_reg_rtx (mode);
39493 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39495 expand_float (out, in, 0);
39497 emit_jump_insn (gen_jump (donelab));
39498 emit_barrier ();
39500 emit_label (neglab);
39502 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39503 1, OPTAB_DIRECT);
39504 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39505 1, OPTAB_DIRECT);
39506 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39508 expand_float (f0, i0, 0);
39510 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39512 emit_label (donelab);
39515 /* AVX512F does support 64-byte integer vector operations,
39516 thus the longest vector we are faced with is V64QImode. */
39517 #define MAX_VECT_LEN 64
39519 struct expand_vec_perm_d
39521 rtx target, op0, op1;
39522 unsigned char perm[MAX_VECT_LEN];
39523 enum machine_mode vmode;
39524 unsigned char nelt;
39525 bool one_operand_p;
39526 bool testing_p;
39529 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39530 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39531 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39533 /* Get a vector mode of the same size as the original but with elements
39534 twice as wide. This is only guaranteed to apply to integral vectors. */
39536 static inline enum machine_mode
39537 get_mode_wider_vector (enum machine_mode o)
39539 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39540 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39541 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39542 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39543 return n;
39546 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39547 fill target with val via vec_duplicate. */
39549 static bool
39550 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39552 bool ok;
39553 rtx insn, dup;
39555 /* First attempt to recognize VAL as-is. */
39556 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39557 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39558 if (recog_memoized (insn) < 0)
39560 rtx seq;
39561 /* If that fails, force VAL into a register. */
39563 start_sequence ();
39564 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39565 seq = get_insns ();
39566 end_sequence ();
39567 if (seq)
39568 emit_insn_before (seq, insn);
39570 ok = recog_memoized (insn) >= 0;
39571 gcc_assert (ok);
39573 return true;
39576 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39577 with all elements equal to VAR. Return true if successful. */
39579 static bool
39580 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39581 rtx target, rtx val)
39583 bool ok;
39585 switch (mode)
39587 case V2SImode:
39588 case V2SFmode:
39589 if (!mmx_ok)
39590 return false;
39591 /* FALLTHRU */
39593 case V4DFmode:
39594 case V4DImode:
39595 case V8SFmode:
39596 case V8SImode:
39597 case V2DFmode:
39598 case V2DImode:
39599 case V4SFmode:
39600 case V4SImode:
39601 case V16SImode:
39602 case V8DImode:
39603 case V16SFmode:
39604 case V8DFmode:
39605 return ix86_vector_duplicate_value (mode, target, val);
39607 case V4HImode:
39608 if (!mmx_ok)
39609 return false;
39610 if (TARGET_SSE || TARGET_3DNOW_A)
39612 rtx x;
39614 val = gen_lowpart (SImode, val);
39615 x = gen_rtx_TRUNCATE (HImode, val);
39616 x = gen_rtx_VEC_DUPLICATE (mode, x);
39617 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39618 return true;
39620 goto widen;
39622 case V8QImode:
39623 if (!mmx_ok)
39624 return false;
39625 goto widen;
39627 case V8HImode:
39628 if (TARGET_SSE2)
39630 struct expand_vec_perm_d dperm;
39631 rtx tmp1, tmp2;
39633 permute:
39634 memset (&dperm, 0, sizeof (dperm));
39635 dperm.target = target;
39636 dperm.vmode = mode;
39637 dperm.nelt = GET_MODE_NUNITS (mode);
39638 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39639 dperm.one_operand_p = true;
39641 /* Extend to SImode using a paradoxical SUBREG. */
39642 tmp1 = gen_reg_rtx (SImode);
39643 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39645 /* Insert the SImode value as low element of a V4SImode vector. */
39646 tmp2 = gen_reg_rtx (V4SImode);
39647 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39648 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39650 ok = (expand_vec_perm_1 (&dperm)
39651 || expand_vec_perm_broadcast_1 (&dperm));
39652 gcc_assert (ok);
39653 return ok;
39655 goto widen;
39657 case V16QImode:
39658 if (TARGET_SSE2)
39659 goto permute;
39660 goto widen;
39662 widen:
39663 /* Replicate the value once into the next wider mode and recurse. */
39665 enum machine_mode smode, wsmode, wvmode;
39666 rtx x;
39668 smode = GET_MODE_INNER (mode);
39669 wvmode = get_mode_wider_vector (mode);
39670 wsmode = GET_MODE_INNER (wvmode);
39672 val = convert_modes (wsmode, smode, val, true);
39673 x = expand_simple_binop (wsmode, ASHIFT, val,
39674 GEN_INT (GET_MODE_BITSIZE (smode)),
39675 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39676 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39678 x = gen_reg_rtx (wvmode);
39679 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39680 gcc_assert (ok);
39681 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39682 return ok;
39685 case V16HImode:
39686 case V32QImode:
39688 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39689 rtx x = gen_reg_rtx (hvmode);
39691 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39692 gcc_assert (ok);
39694 x = gen_rtx_VEC_CONCAT (mode, x, x);
39695 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39697 return true;
39699 default:
39700 return false;
39704 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39705 whose ONE_VAR element is VAR, and other elements are zero. Return true
39706 if successful. */
39708 static bool
39709 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39710 rtx target, rtx var, int one_var)
39712 enum machine_mode vsimode;
39713 rtx new_target;
39714 rtx x, tmp;
39715 bool use_vector_set = false;
39717 switch (mode)
39719 case V2DImode:
39720 /* For SSE4.1, we normally use vector set. But if the second
39721 element is zero and inter-unit moves are OK, we use movq
39722 instead. */
39723 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39724 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39725 && one_var == 0));
39726 break;
39727 case V16QImode:
39728 case V4SImode:
39729 case V4SFmode:
39730 use_vector_set = TARGET_SSE4_1;
39731 break;
39732 case V8HImode:
39733 use_vector_set = TARGET_SSE2;
39734 break;
39735 case V4HImode:
39736 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39737 break;
39738 case V32QImode:
39739 case V16HImode:
39740 case V8SImode:
39741 case V8SFmode:
39742 case V4DFmode:
39743 use_vector_set = TARGET_AVX;
39744 break;
39745 case V4DImode:
39746 /* Use ix86_expand_vector_set in 64bit mode only. */
39747 use_vector_set = TARGET_AVX && TARGET_64BIT;
39748 break;
39749 default:
39750 break;
39753 if (use_vector_set)
39755 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39756 var = force_reg (GET_MODE_INNER (mode), var);
39757 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39758 return true;
39761 switch (mode)
39763 case V2SFmode:
39764 case V2SImode:
39765 if (!mmx_ok)
39766 return false;
39767 /* FALLTHRU */
39769 case V2DFmode:
39770 case V2DImode:
39771 if (one_var != 0)
39772 return false;
39773 var = force_reg (GET_MODE_INNER (mode), var);
39774 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39775 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39776 return true;
39778 case V4SFmode:
39779 case V4SImode:
39780 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39781 new_target = gen_reg_rtx (mode);
39782 else
39783 new_target = target;
39784 var = force_reg (GET_MODE_INNER (mode), var);
39785 x = gen_rtx_VEC_DUPLICATE (mode, var);
39786 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39787 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39788 if (one_var != 0)
39790 /* We need to shuffle the value to the correct position, so
39791 create a new pseudo to store the intermediate result. */
39793 /* With SSE2, we can use the integer shuffle insns. */
39794 if (mode != V4SFmode && TARGET_SSE2)
39796 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39797 const1_rtx,
39798 GEN_INT (one_var == 1 ? 0 : 1),
39799 GEN_INT (one_var == 2 ? 0 : 1),
39800 GEN_INT (one_var == 3 ? 0 : 1)));
39801 if (target != new_target)
39802 emit_move_insn (target, new_target);
39803 return true;
39806 /* Otherwise convert the intermediate result to V4SFmode and
39807 use the SSE1 shuffle instructions. */
39808 if (mode != V4SFmode)
39810 tmp = gen_reg_rtx (V4SFmode);
39811 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39813 else
39814 tmp = new_target;
39816 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39817 const1_rtx,
39818 GEN_INT (one_var == 1 ? 0 : 1),
39819 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39820 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39822 if (mode != V4SFmode)
39823 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39824 else if (tmp != target)
39825 emit_move_insn (target, tmp);
39827 else if (target != new_target)
39828 emit_move_insn (target, new_target);
39829 return true;
39831 case V8HImode:
39832 case V16QImode:
39833 vsimode = V4SImode;
39834 goto widen;
39835 case V4HImode:
39836 case V8QImode:
39837 if (!mmx_ok)
39838 return false;
39839 vsimode = V2SImode;
39840 goto widen;
39841 widen:
39842 if (one_var != 0)
39843 return false;
39845 /* Zero extend the variable element to SImode and recurse. */
39846 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39848 x = gen_reg_rtx (vsimode);
39849 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39850 var, one_var))
39851 gcc_unreachable ();
39853 emit_move_insn (target, gen_lowpart (mode, x));
39854 return true;
39856 default:
39857 return false;
39861 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39862 consisting of the values in VALS. It is known that all elements
39863 except ONE_VAR are constants. Return true if successful. */
39865 static bool
39866 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39867 rtx target, rtx vals, int one_var)
39869 rtx var = XVECEXP (vals, 0, one_var);
39870 enum machine_mode wmode;
39871 rtx const_vec, x;
39873 const_vec = copy_rtx (vals);
39874 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39875 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39877 switch (mode)
39879 case V2DFmode:
39880 case V2DImode:
39881 case V2SFmode:
39882 case V2SImode:
39883 /* For the two element vectors, it's just as easy to use
39884 the general case. */
39885 return false;
39887 case V4DImode:
39888 /* Use ix86_expand_vector_set in 64bit mode only. */
39889 if (!TARGET_64BIT)
39890 return false;
39891 case V4DFmode:
39892 case V8SFmode:
39893 case V8SImode:
39894 case V16HImode:
39895 case V32QImode:
39896 case V4SFmode:
39897 case V4SImode:
39898 case V8HImode:
39899 case V4HImode:
39900 break;
39902 case V16QImode:
39903 if (TARGET_SSE4_1)
39904 break;
39905 wmode = V8HImode;
39906 goto widen;
39907 case V8QImode:
39908 wmode = V4HImode;
39909 goto widen;
39910 widen:
39911 /* There's no way to set one QImode entry easily. Combine
39912 the variable value with its adjacent constant value, and
39913 promote to an HImode set. */
39914 x = XVECEXP (vals, 0, one_var ^ 1);
39915 if (one_var & 1)
39917 var = convert_modes (HImode, QImode, var, true);
39918 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39919 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39920 x = GEN_INT (INTVAL (x) & 0xff);
39922 else
39924 var = convert_modes (HImode, QImode, var, true);
39925 x = gen_int_mode (INTVAL (x) << 8, HImode);
39927 if (x != const0_rtx)
39928 var = expand_simple_binop (HImode, IOR, var, x, var,
39929 1, OPTAB_LIB_WIDEN);
39931 x = gen_reg_rtx (wmode);
39932 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39933 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39935 emit_move_insn (target, gen_lowpart (mode, x));
39936 return true;
39938 default:
39939 return false;
39942 emit_move_insn (target, const_vec);
39943 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39944 return true;
39947 /* A subroutine of ix86_expand_vector_init_general. Use vector
39948 concatenate to handle the most general case: all values variable,
39949 and none identical. */
39951 static void
39952 ix86_expand_vector_init_concat (enum machine_mode mode,
39953 rtx target, rtx *ops, int n)
39955 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39956 rtx first[16], second[8], third[4];
39957 rtvec v;
39958 int i, j;
39960 switch (n)
39962 case 2:
39963 switch (mode)
39965 case V16SImode:
39966 cmode = V8SImode;
39967 break;
39968 case V16SFmode:
39969 cmode = V8SFmode;
39970 break;
39971 case V8DImode:
39972 cmode = V4DImode;
39973 break;
39974 case V8DFmode:
39975 cmode = V4DFmode;
39976 break;
39977 case V8SImode:
39978 cmode = V4SImode;
39979 break;
39980 case V8SFmode:
39981 cmode = V4SFmode;
39982 break;
39983 case V4DImode:
39984 cmode = V2DImode;
39985 break;
39986 case V4DFmode:
39987 cmode = V2DFmode;
39988 break;
39989 case V4SImode:
39990 cmode = V2SImode;
39991 break;
39992 case V4SFmode:
39993 cmode = V2SFmode;
39994 break;
39995 case V2DImode:
39996 cmode = DImode;
39997 break;
39998 case V2SImode:
39999 cmode = SImode;
40000 break;
40001 case V2DFmode:
40002 cmode = DFmode;
40003 break;
40004 case V2SFmode:
40005 cmode = SFmode;
40006 break;
40007 default:
40008 gcc_unreachable ();
40011 if (!register_operand (ops[1], cmode))
40012 ops[1] = force_reg (cmode, ops[1]);
40013 if (!register_operand (ops[0], cmode))
40014 ops[0] = force_reg (cmode, ops[0]);
40015 emit_insn (gen_rtx_SET (VOIDmode, target,
40016 gen_rtx_VEC_CONCAT (mode, ops[0],
40017 ops[1])));
40018 break;
40020 case 4:
40021 switch (mode)
40023 case V4DImode:
40024 cmode = V2DImode;
40025 break;
40026 case V4DFmode:
40027 cmode = V2DFmode;
40028 break;
40029 case V4SImode:
40030 cmode = V2SImode;
40031 break;
40032 case V4SFmode:
40033 cmode = V2SFmode;
40034 break;
40035 default:
40036 gcc_unreachable ();
40038 goto half;
40040 case 8:
40041 switch (mode)
40043 case V8DImode:
40044 cmode = V2DImode;
40045 hmode = V4DImode;
40046 break;
40047 case V8DFmode:
40048 cmode = V2DFmode;
40049 hmode = V4DFmode;
40050 break;
40051 case V8SImode:
40052 cmode = V2SImode;
40053 hmode = V4SImode;
40054 break;
40055 case V8SFmode:
40056 cmode = V2SFmode;
40057 hmode = V4SFmode;
40058 break;
40059 default:
40060 gcc_unreachable ();
40062 goto half;
40064 case 16:
40065 switch (mode)
40067 case V16SImode:
40068 cmode = V2SImode;
40069 hmode = V4SImode;
40070 gmode = V8SImode;
40071 break;
40072 case V16SFmode:
40073 cmode = V2SFmode;
40074 hmode = V4SFmode;
40075 gmode = V8SFmode;
40076 break;
40077 default:
40078 gcc_unreachable ();
40080 goto half;
40082 half:
40083 /* FIXME: We process inputs backward to help RA. PR 36222. */
40084 i = n - 1;
40085 j = (n >> 1) - 1;
40086 for (; i > 0; i -= 2, j--)
40088 first[j] = gen_reg_rtx (cmode);
40089 v = gen_rtvec (2, ops[i - 1], ops[i]);
40090 ix86_expand_vector_init (false, first[j],
40091 gen_rtx_PARALLEL (cmode, v));
40094 n >>= 1;
40095 if (n > 4)
40097 gcc_assert (hmode != VOIDmode);
40098 gcc_assert (gmode != VOIDmode);
40099 for (i = j = 0; i < n; i += 2, j++)
40101 second[j] = gen_reg_rtx (hmode);
40102 ix86_expand_vector_init_concat (hmode, second [j],
40103 &first [i], 2);
40105 n >>= 1;
40106 for (i = j = 0; i < n; i += 2, j++)
40108 third[j] = gen_reg_rtx (gmode);
40109 ix86_expand_vector_init_concat (gmode, third[j],
40110 &second[i], 2);
40112 n >>= 1;
40113 ix86_expand_vector_init_concat (mode, target, third, n);
40115 else if (n > 2)
40117 gcc_assert (hmode != VOIDmode);
40118 for (i = j = 0; i < n; i += 2, j++)
40120 second[j] = gen_reg_rtx (hmode);
40121 ix86_expand_vector_init_concat (hmode, second [j],
40122 &first [i], 2);
40124 n >>= 1;
40125 ix86_expand_vector_init_concat (mode, target, second, n);
40127 else
40128 ix86_expand_vector_init_concat (mode, target, first, n);
40129 break;
40131 default:
40132 gcc_unreachable ();
40136 /* A subroutine of ix86_expand_vector_init_general. Use vector
40137 interleave to handle the most general case: all values variable,
40138 and none identical. */
40140 static void
40141 ix86_expand_vector_init_interleave (enum machine_mode mode,
40142 rtx target, rtx *ops, int n)
40144 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40145 int i, j;
40146 rtx op0, op1;
40147 rtx (*gen_load_even) (rtx, rtx, rtx);
40148 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40149 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40151 switch (mode)
40153 case V8HImode:
40154 gen_load_even = gen_vec_setv8hi;
40155 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40156 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40157 inner_mode = HImode;
40158 first_imode = V4SImode;
40159 second_imode = V2DImode;
40160 third_imode = VOIDmode;
40161 break;
40162 case V16QImode:
40163 gen_load_even = gen_vec_setv16qi;
40164 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40165 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40166 inner_mode = QImode;
40167 first_imode = V8HImode;
40168 second_imode = V4SImode;
40169 third_imode = V2DImode;
40170 break;
40171 default:
40172 gcc_unreachable ();
40175 for (i = 0; i < n; i++)
40177 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40178 op0 = gen_reg_rtx (SImode);
40179 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40181 /* Insert the SImode value as low element of V4SImode vector. */
40182 op1 = gen_reg_rtx (V4SImode);
40183 op0 = gen_rtx_VEC_MERGE (V4SImode,
40184 gen_rtx_VEC_DUPLICATE (V4SImode,
40185 op0),
40186 CONST0_RTX (V4SImode),
40187 const1_rtx);
40188 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40190 /* Cast the V4SImode vector back to a vector in orignal mode. */
40191 op0 = gen_reg_rtx (mode);
40192 emit_move_insn (op0, gen_lowpart (mode, op1));
40194 /* Load even elements into the second position. */
40195 emit_insn (gen_load_even (op0,
40196 force_reg (inner_mode,
40197 ops [i + i + 1]),
40198 const1_rtx));
40200 /* Cast vector to FIRST_IMODE vector. */
40201 ops[i] = gen_reg_rtx (first_imode);
40202 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40205 /* Interleave low FIRST_IMODE vectors. */
40206 for (i = j = 0; i < n; i += 2, j++)
40208 op0 = gen_reg_rtx (first_imode);
40209 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40211 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40212 ops[j] = gen_reg_rtx (second_imode);
40213 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40216 /* Interleave low SECOND_IMODE vectors. */
40217 switch (second_imode)
40219 case V4SImode:
40220 for (i = j = 0; i < n / 2; i += 2, j++)
40222 op0 = gen_reg_rtx (second_imode);
40223 emit_insn (gen_interleave_second_low (op0, ops[i],
40224 ops[i + 1]));
40226 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40227 vector. */
40228 ops[j] = gen_reg_rtx (third_imode);
40229 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40231 second_imode = V2DImode;
40232 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40233 /* FALLTHRU */
40235 case V2DImode:
40236 op0 = gen_reg_rtx (second_imode);
40237 emit_insn (gen_interleave_second_low (op0, ops[0],
40238 ops[1]));
40240 /* Cast the SECOND_IMODE vector back to a vector on original
40241 mode. */
40242 emit_insn (gen_rtx_SET (VOIDmode, target,
40243 gen_lowpart (mode, op0)));
40244 break;
40246 default:
40247 gcc_unreachable ();
40251 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40252 all values variable, and none identical. */
40254 static void
40255 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40256 rtx target, rtx vals)
40258 rtx ops[64], op0, op1;
40259 enum machine_mode half_mode = VOIDmode;
40260 int n, i;
40262 switch (mode)
40264 case V2SFmode:
40265 case V2SImode:
40266 if (!mmx_ok && !TARGET_SSE)
40267 break;
40268 /* FALLTHRU */
40270 case V16SImode:
40271 case V16SFmode:
40272 case V8DFmode:
40273 case V8DImode:
40274 case V8SFmode:
40275 case V8SImode:
40276 case V4DFmode:
40277 case V4DImode:
40278 case V4SFmode:
40279 case V4SImode:
40280 case V2DFmode:
40281 case V2DImode:
40282 n = GET_MODE_NUNITS (mode);
40283 for (i = 0; i < n; i++)
40284 ops[i] = XVECEXP (vals, 0, i);
40285 ix86_expand_vector_init_concat (mode, target, ops, n);
40286 return;
40288 case V32QImode:
40289 half_mode = V16QImode;
40290 goto half;
40292 case V16HImode:
40293 half_mode = V8HImode;
40294 goto half;
40296 half:
40297 n = GET_MODE_NUNITS (mode);
40298 for (i = 0; i < n; i++)
40299 ops[i] = XVECEXP (vals, 0, i);
40300 op0 = gen_reg_rtx (half_mode);
40301 op1 = gen_reg_rtx (half_mode);
40302 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40303 n >> 2);
40304 ix86_expand_vector_init_interleave (half_mode, op1,
40305 &ops [n >> 1], n >> 2);
40306 emit_insn (gen_rtx_SET (VOIDmode, target,
40307 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40308 return;
40310 case V16QImode:
40311 if (!TARGET_SSE4_1)
40312 break;
40313 /* FALLTHRU */
40315 case V8HImode:
40316 if (!TARGET_SSE2)
40317 break;
40319 /* Don't use ix86_expand_vector_init_interleave if we can't
40320 move from GPR to SSE register directly. */
40321 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40322 break;
40324 n = GET_MODE_NUNITS (mode);
40325 for (i = 0; i < n; i++)
40326 ops[i] = XVECEXP (vals, 0, i);
40327 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40328 return;
40330 case V4HImode:
40331 case V8QImode:
40332 break;
40334 default:
40335 gcc_unreachable ();
40339 int i, j, n_elts, n_words, n_elt_per_word;
40340 enum machine_mode inner_mode;
40341 rtx words[4], shift;
40343 inner_mode = GET_MODE_INNER (mode);
40344 n_elts = GET_MODE_NUNITS (mode);
40345 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40346 n_elt_per_word = n_elts / n_words;
40347 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40349 for (i = 0; i < n_words; ++i)
40351 rtx word = NULL_RTX;
40353 for (j = 0; j < n_elt_per_word; ++j)
40355 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40356 elt = convert_modes (word_mode, inner_mode, elt, true);
40358 if (j == 0)
40359 word = elt;
40360 else
40362 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40363 word, 1, OPTAB_LIB_WIDEN);
40364 word = expand_simple_binop (word_mode, IOR, word, elt,
40365 word, 1, OPTAB_LIB_WIDEN);
40369 words[i] = word;
40372 if (n_words == 1)
40373 emit_move_insn (target, gen_lowpart (mode, words[0]));
40374 else if (n_words == 2)
40376 rtx tmp = gen_reg_rtx (mode);
40377 emit_clobber (tmp);
40378 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40379 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40380 emit_move_insn (target, tmp);
40382 else if (n_words == 4)
40384 rtx tmp = gen_reg_rtx (V4SImode);
40385 gcc_assert (word_mode == SImode);
40386 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40387 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40388 emit_move_insn (target, gen_lowpart (mode, tmp));
40390 else
40391 gcc_unreachable ();
40395 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40396 instructions unless MMX_OK is true. */
40398 void
40399 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40401 enum machine_mode mode = GET_MODE (target);
40402 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40403 int n_elts = GET_MODE_NUNITS (mode);
40404 int n_var = 0, one_var = -1;
40405 bool all_same = true, all_const_zero = true;
40406 int i;
40407 rtx x;
40409 for (i = 0; i < n_elts; ++i)
40411 x = XVECEXP (vals, 0, i);
40412 if (!(CONST_INT_P (x)
40413 || GET_CODE (x) == CONST_DOUBLE
40414 || GET_CODE (x) == CONST_FIXED))
40415 n_var++, one_var = i;
40416 else if (x != CONST0_RTX (inner_mode))
40417 all_const_zero = false;
40418 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40419 all_same = false;
40422 /* Constants are best loaded from the constant pool. */
40423 if (n_var == 0)
40425 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40426 return;
40429 /* If all values are identical, broadcast the value. */
40430 if (all_same
40431 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40432 XVECEXP (vals, 0, 0)))
40433 return;
40435 /* Values where only one field is non-constant are best loaded from
40436 the pool and overwritten via move later. */
40437 if (n_var == 1)
40439 if (all_const_zero
40440 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40441 XVECEXP (vals, 0, one_var),
40442 one_var))
40443 return;
40445 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40446 return;
40449 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40452 void
40453 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40455 enum machine_mode mode = GET_MODE (target);
40456 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40457 enum machine_mode half_mode;
40458 bool use_vec_merge = false;
40459 rtx tmp;
40460 static rtx (*gen_extract[6][2]) (rtx, rtx)
40462 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40463 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40464 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40465 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40466 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40467 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40469 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40471 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40472 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40473 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40474 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40475 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40476 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40478 int i, j, n;
40480 switch (mode)
40482 case V2SFmode:
40483 case V2SImode:
40484 if (mmx_ok)
40486 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40487 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40488 if (elt == 0)
40489 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40490 else
40491 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40492 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40493 return;
40495 break;
40497 case V2DImode:
40498 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40499 if (use_vec_merge)
40500 break;
40502 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40503 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40504 if (elt == 0)
40505 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40506 else
40507 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40508 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40509 return;
40511 case V2DFmode:
40513 rtx op0, op1;
40515 /* For the two element vectors, we implement a VEC_CONCAT with
40516 the extraction of the other element. */
40518 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40519 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40521 if (elt == 0)
40522 op0 = val, op1 = tmp;
40523 else
40524 op0 = tmp, op1 = val;
40526 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40527 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40529 return;
40531 case V4SFmode:
40532 use_vec_merge = TARGET_SSE4_1;
40533 if (use_vec_merge)
40534 break;
40536 switch (elt)
40538 case 0:
40539 use_vec_merge = true;
40540 break;
40542 case 1:
40543 /* tmp = target = A B C D */
40544 tmp = copy_to_reg (target);
40545 /* target = A A B B */
40546 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40547 /* target = X A B B */
40548 ix86_expand_vector_set (false, target, val, 0);
40549 /* target = A X C D */
40550 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40551 const1_rtx, const0_rtx,
40552 GEN_INT (2+4), GEN_INT (3+4)));
40553 return;
40555 case 2:
40556 /* tmp = target = A B C D */
40557 tmp = copy_to_reg (target);
40558 /* tmp = X B C D */
40559 ix86_expand_vector_set (false, tmp, val, 0);
40560 /* target = A B X D */
40561 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40562 const0_rtx, const1_rtx,
40563 GEN_INT (0+4), GEN_INT (3+4)));
40564 return;
40566 case 3:
40567 /* tmp = target = A B C D */
40568 tmp = copy_to_reg (target);
40569 /* tmp = X B C D */
40570 ix86_expand_vector_set (false, tmp, val, 0);
40571 /* target = A B X D */
40572 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40573 const0_rtx, const1_rtx,
40574 GEN_INT (2+4), GEN_INT (0+4)));
40575 return;
40577 default:
40578 gcc_unreachable ();
40580 break;
40582 case V4SImode:
40583 use_vec_merge = TARGET_SSE4_1;
40584 if (use_vec_merge)
40585 break;
40587 /* Element 0 handled by vec_merge below. */
40588 if (elt == 0)
40590 use_vec_merge = true;
40591 break;
40594 if (TARGET_SSE2)
40596 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40597 store into element 0, then shuffle them back. */
40599 rtx order[4];
40601 order[0] = GEN_INT (elt);
40602 order[1] = const1_rtx;
40603 order[2] = const2_rtx;
40604 order[3] = GEN_INT (3);
40605 order[elt] = const0_rtx;
40607 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40608 order[1], order[2], order[3]));
40610 ix86_expand_vector_set (false, target, val, 0);
40612 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40613 order[1], order[2], order[3]));
40615 else
40617 /* For SSE1, we have to reuse the V4SF code. */
40618 rtx t = gen_reg_rtx (V4SFmode);
40619 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40620 emit_move_insn (target, gen_lowpart (mode, t));
40622 return;
40624 case V8HImode:
40625 use_vec_merge = TARGET_SSE2;
40626 break;
40627 case V4HImode:
40628 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40629 break;
40631 case V16QImode:
40632 use_vec_merge = TARGET_SSE4_1;
40633 break;
40635 case V8QImode:
40636 break;
40638 case V32QImode:
40639 half_mode = V16QImode;
40640 j = 0;
40641 n = 16;
40642 goto half;
40644 case V16HImode:
40645 half_mode = V8HImode;
40646 j = 1;
40647 n = 8;
40648 goto half;
40650 case V8SImode:
40651 half_mode = V4SImode;
40652 j = 2;
40653 n = 4;
40654 goto half;
40656 case V4DImode:
40657 half_mode = V2DImode;
40658 j = 3;
40659 n = 2;
40660 goto half;
40662 case V8SFmode:
40663 half_mode = V4SFmode;
40664 j = 4;
40665 n = 4;
40666 goto half;
40668 case V4DFmode:
40669 half_mode = V2DFmode;
40670 j = 5;
40671 n = 2;
40672 goto half;
40674 half:
40675 /* Compute offset. */
40676 i = elt / n;
40677 elt %= n;
40679 gcc_assert (i <= 1);
40681 /* Extract the half. */
40682 tmp = gen_reg_rtx (half_mode);
40683 emit_insn (gen_extract[j][i] (tmp, target));
40685 /* Put val in tmp at elt. */
40686 ix86_expand_vector_set (false, tmp, val, elt);
40688 /* Put it back. */
40689 emit_insn (gen_insert[j][i] (target, target, tmp));
40690 return;
40692 default:
40693 break;
40696 if (use_vec_merge)
40698 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40699 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40700 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40702 else
40704 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40706 emit_move_insn (mem, target);
40708 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40709 emit_move_insn (tmp, val);
40711 emit_move_insn (target, mem);
40715 void
40716 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40718 enum machine_mode mode = GET_MODE (vec);
40719 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40720 bool use_vec_extr = false;
40721 rtx tmp;
40723 switch (mode)
40725 case V2SImode:
40726 case V2SFmode:
40727 if (!mmx_ok)
40728 break;
40729 /* FALLTHRU */
40731 case V2DFmode:
40732 case V2DImode:
40733 use_vec_extr = true;
40734 break;
40736 case V4SFmode:
40737 use_vec_extr = TARGET_SSE4_1;
40738 if (use_vec_extr)
40739 break;
40741 switch (elt)
40743 case 0:
40744 tmp = vec;
40745 break;
40747 case 1:
40748 case 3:
40749 tmp = gen_reg_rtx (mode);
40750 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40751 GEN_INT (elt), GEN_INT (elt),
40752 GEN_INT (elt+4), GEN_INT (elt+4)));
40753 break;
40755 case 2:
40756 tmp = gen_reg_rtx (mode);
40757 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40758 break;
40760 default:
40761 gcc_unreachable ();
40763 vec = tmp;
40764 use_vec_extr = true;
40765 elt = 0;
40766 break;
40768 case V4SImode:
40769 use_vec_extr = TARGET_SSE4_1;
40770 if (use_vec_extr)
40771 break;
40773 if (TARGET_SSE2)
40775 switch (elt)
40777 case 0:
40778 tmp = vec;
40779 break;
40781 case 1:
40782 case 3:
40783 tmp = gen_reg_rtx (mode);
40784 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40785 GEN_INT (elt), GEN_INT (elt),
40786 GEN_INT (elt), GEN_INT (elt)));
40787 break;
40789 case 2:
40790 tmp = gen_reg_rtx (mode);
40791 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40792 break;
40794 default:
40795 gcc_unreachable ();
40797 vec = tmp;
40798 use_vec_extr = true;
40799 elt = 0;
40801 else
40803 /* For SSE1, we have to reuse the V4SF code. */
40804 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40805 gen_lowpart (V4SFmode, vec), elt);
40806 return;
40808 break;
40810 case V8HImode:
40811 use_vec_extr = TARGET_SSE2;
40812 break;
40813 case V4HImode:
40814 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40815 break;
40817 case V16QImode:
40818 use_vec_extr = TARGET_SSE4_1;
40819 break;
40821 case V8SFmode:
40822 if (TARGET_AVX)
40824 tmp = gen_reg_rtx (V4SFmode);
40825 if (elt < 4)
40826 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40827 else
40828 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40829 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40830 return;
40832 break;
40834 case V4DFmode:
40835 if (TARGET_AVX)
40837 tmp = gen_reg_rtx (V2DFmode);
40838 if (elt < 2)
40839 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40840 else
40841 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40842 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40843 return;
40845 break;
40847 case V32QImode:
40848 if (TARGET_AVX)
40850 tmp = gen_reg_rtx (V16QImode);
40851 if (elt < 16)
40852 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40853 else
40854 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40855 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40856 return;
40858 break;
40860 case V16HImode:
40861 if (TARGET_AVX)
40863 tmp = gen_reg_rtx (V8HImode);
40864 if (elt < 8)
40865 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40866 else
40867 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40868 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40869 return;
40871 break;
40873 case V8SImode:
40874 if (TARGET_AVX)
40876 tmp = gen_reg_rtx (V4SImode);
40877 if (elt < 4)
40878 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40879 else
40880 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40881 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40882 return;
40884 break;
40886 case V4DImode:
40887 if (TARGET_AVX)
40889 tmp = gen_reg_rtx (V2DImode);
40890 if (elt < 2)
40891 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40892 else
40893 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40894 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40895 return;
40897 break;
40899 case V16SFmode:
40900 tmp = gen_reg_rtx (V8SFmode);
40901 if (elt < 8)
40902 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40903 else
40904 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40905 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40906 return;
40908 case V8DFmode:
40909 tmp = gen_reg_rtx (V4DFmode);
40910 if (elt < 4)
40911 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40912 else
40913 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40914 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40915 return;
40917 case V16SImode:
40918 tmp = gen_reg_rtx (V8SImode);
40919 if (elt < 8)
40920 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40921 else
40922 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40923 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40924 return;
40926 case V8DImode:
40927 tmp = gen_reg_rtx (V4DImode);
40928 if (elt < 4)
40929 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40930 else
40931 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40932 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40933 return;
40935 case V8QImode:
40936 /* ??? Could extract the appropriate HImode element and shift. */
40937 default:
40938 break;
40941 if (use_vec_extr)
40943 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40944 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40946 /* Let the rtl optimizers know about the zero extension performed. */
40947 if (inner_mode == QImode || inner_mode == HImode)
40949 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40950 target = gen_lowpart (SImode, target);
40953 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40955 else
40957 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40959 emit_move_insn (mem, vec);
40961 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40962 emit_move_insn (target, tmp);
40966 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40967 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40968 The upper bits of DEST are undefined, though they shouldn't cause
40969 exceptions (some bits from src or all zeros are ok). */
40971 static void
40972 emit_reduc_half (rtx dest, rtx src, int i)
40974 rtx tem, d = dest;
40975 switch (GET_MODE (src))
40977 case V4SFmode:
40978 if (i == 128)
40979 tem = gen_sse_movhlps (dest, src, src);
40980 else
40981 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40982 GEN_INT (1 + 4), GEN_INT (1 + 4));
40983 break;
40984 case V2DFmode:
40985 tem = gen_vec_interleave_highv2df (dest, src, src);
40986 break;
40987 case V16QImode:
40988 case V8HImode:
40989 case V4SImode:
40990 case V2DImode:
40991 d = gen_reg_rtx (V1TImode);
40992 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40993 GEN_INT (i / 2));
40994 break;
40995 case V8SFmode:
40996 if (i == 256)
40997 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40998 else
40999 tem = gen_avx_shufps256 (dest, src, src,
41000 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41001 break;
41002 case V4DFmode:
41003 if (i == 256)
41004 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41005 else
41006 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41007 break;
41008 case V32QImode:
41009 case V16HImode:
41010 case V8SImode:
41011 case V4DImode:
41012 if (i == 256)
41014 if (GET_MODE (dest) != V4DImode)
41015 d = gen_reg_rtx (V4DImode);
41016 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41017 gen_lowpart (V4DImode, src),
41018 const1_rtx);
41020 else
41022 d = gen_reg_rtx (V2TImode);
41023 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41024 GEN_INT (i / 2));
41026 break;
41027 case V16SImode:
41028 case V16SFmode:
41029 case V8DImode:
41030 case V8DFmode:
41031 if (i > 128)
41032 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41033 gen_lowpart (V16SImode, src),
41034 gen_lowpart (V16SImode, src),
41035 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41036 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41037 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41038 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41039 GEN_INT (0xC), GEN_INT (0xD),
41040 GEN_INT (0xE), GEN_INT (0xF),
41041 GEN_INT (0x10), GEN_INT (0x11),
41042 GEN_INT (0x12), GEN_INT (0x13),
41043 GEN_INT (0x14), GEN_INT (0x15),
41044 GEN_INT (0x16), GEN_INT (0x17));
41045 else
41046 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41047 gen_lowpart (V16SImode, src),
41048 GEN_INT (i == 128 ? 0x2 : 0x1),
41049 GEN_INT (0x3),
41050 GEN_INT (0x3),
41051 GEN_INT (0x3),
41052 GEN_INT (i == 128 ? 0x6 : 0x5),
41053 GEN_INT (0x7),
41054 GEN_INT (0x7),
41055 GEN_INT (0x7),
41056 GEN_INT (i == 128 ? 0xA : 0x9),
41057 GEN_INT (0xB),
41058 GEN_INT (0xB),
41059 GEN_INT (0xB),
41060 GEN_INT (i == 128 ? 0xE : 0xD),
41061 GEN_INT (0xF),
41062 GEN_INT (0xF),
41063 GEN_INT (0xF));
41064 break;
41065 default:
41066 gcc_unreachable ();
41068 emit_insn (tem);
41069 if (d != dest)
41070 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41073 /* Expand a vector reduction. FN is the binary pattern to reduce;
41074 DEST is the destination; IN is the input vector. */
41076 void
41077 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41079 rtx half, dst, vec = in;
41080 enum machine_mode mode = GET_MODE (in);
41081 int i;
41083 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41084 if (TARGET_SSE4_1
41085 && mode == V8HImode
41086 && fn == gen_uminv8hi3)
41088 emit_insn (gen_sse4_1_phminposuw (dest, in));
41089 return;
41092 for (i = GET_MODE_BITSIZE (mode);
41093 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41094 i >>= 1)
41096 half = gen_reg_rtx (mode);
41097 emit_reduc_half (half, vec, i);
41098 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41099 dst = dest;
41100 else
41101 dst = gen_reg_rtx (mode);
41102 emit_insn (fn (dst, half, vec));
41103 vec = dst;
41107 /* Target hook for scalar_mode_supported_p. */
41108 static bool
41109 ix86_scalar_mode_supported_p (enum machine_mode mode)
41111 if (DECIMAL_FLOAT_MODE_P (mode))
41112 return default_decimal_float_supported_p ();
41113 else if (mode == TFmode)
41114 return true;
41115 else
41116 return default_scalar_mode_supported_p (mode);
41119 /* Implements target hook vector_mode_supported_p. */
41120 static bool
41121 ix86_vector_mode_supported_p (enum machine_mode mode)
41123 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41124 return true;
41125 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41126 return true;
41127 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41128 return true;
41129 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41130 return true;
41131 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41132 return true;
41133 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41134 return true;
41135 return false;
41138 /* Target hook for c_mode_for_suffix. */
41139 static enum machine_mode
41140 ix86_c_mode_for_suffix (char suffix)
41142 if (suffix == 'q')
41143 return TFmode;
41144 if (suffix == 'w')
41145 return XFmode;
41147 return VOIDmode;
41150 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41152 We do this in the new i386 backend to maintain source compatibility
41153 with the old cc0-based compiler. */
41155 static tree
41156 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41157 tree inputs ATTRIBUTE_UNUSED,
41158 tree clobbers)
41160 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41161 clobbers);
41162 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41163 clobbers);
41164 return clobbers;
41167 /* Implements target vector targetm.asm.encode_section_info. */
41169 static void ATTRIBUTE_UNUSED
41170 ix86_encode_section_info (tree decl, rtx rtl, int first)
41172 default_encode_section_info (decl, rtl, first);
41174 if (TREE_CODE (decl) == VAR_DECL
41175 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41176 && ix86_in_large_data_p (decl))
41177 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41180 /* Worker function for REVERSE_CONDITION. */
41182 enum rtx_code
41183 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41185 return (mode != CCFPmode && mode != CCFPUmode
41186 ? reverse_condition (code)
41187 : reverse_condition_maybe_unordered (code));
41190 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41191 to OPERANDS[0]. */
41193 const char *
41194 output_387_reg_move (rtx insn, rtx *operands)
41196 if (REG_P (operands[0]))
41198 if (REG_P (operands[1])
41199 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41201 if (REGNO (operands[0]) == FIRST_STACK_REG)
41202 return output_387_ffreep (operands, 0);
41203 return "fstp\t%y0";
41205 if (STACK_TOP_P (operands[0]))
41206 return "fld%Z1\t%y1";
41207 return "fst\t%y0";
41209 else if (MEM_P (operands[0]))
41211 gcc_assert (REG_P (operands[1]));
41212 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41213 return "fstp%Z0\t%y0";
41214 else
41216 /* There is no non-popping store to memory for XFmode.
41217 So if we need one, follow the store with a load. */
41218 if (GET_MODE (operands[0]) == XFmode)
41219 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41220 else
41221 return "fst%Z0\t%y0";
41224 else
41225 gcc_unreachable();
41228 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41229 FP status register is set. */
41231 void
41232 ix86_emit_fp_unordered_jump (rtx label)
41234 rtx reg = gen_reg_rtx (HImode);
41235 rtx temp;
41237 emit_insn (gen_x86_fnstsw_1 (reg));
41239 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41241 emit_insn (gen_x86_sahf_1 (reg));
41243 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41244 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41246 else
41248 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41250 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41251 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41254 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41255 gen_rtx_LABEL_REF (VOIDmode, label),
41256 pc_rtx);
41257 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41259 emit_jump_insn (temp);
41260 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41263 /* Output code to perform a log1p XFmode calculation. */
41265 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41267 rtx label1 = gen_label_rtx ();
41268 rtx label2 = gen_label_rtx ();
41270 rtx tmp = gen_reg_rtx (XFmode);
41271 rtx tmp2 = gen_reg_rtx (XFmode);
41272 rtx test;
41274 emit_insn (gen_absxf2 (tmp, op1));
41275 test = gen_rtx_GE (VOIDmode, tmp,
41276 CONST_DOUBLE_FROM_REAL_VALUE (
41277 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41278 XFmode));
41279 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41281 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41282 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41283 emit_jump (label2);
41285 emit_label (label1);
41286 emit_move_insn (tmp, CONST1_RTX (XFmode));
41287 emit_insn (gen_addxf3 (tmp, op1, tmp));
41288 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41289 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41291 emit_label (label2);
41294 /* Emit code for round calculation. */
41295 void ix86_emit_i387_round (rtx op0, rtx op1)
41297 enum machine_mode inmode = GET_MODE (op1);
41298 enum machine_mode outmode = GET_MODE (op0);
41299 rtx e1, e2, res, tmp, tmp1, half;
41300 rtx scratch = gen_reg_rtx (HImode);
41301 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41302 rtx jump_label = gen_label_rtx ();
41303 rtx insn;
41304 rtx (*gen_abs) (rtx, rtx);
41305 rtx (*gen_neg) (rtx, rtx);
41307 switch (inmode)
41309 case SFmode:
41310 gen_abs = gen_abssf2;
41311 break;
41312 case DFmode:
41313 gen_abs = gen_absdf2;
41314 break;
41315 case XFmode:
41316 gen_abs = gen_absxf2;
41317 break;
41318 default:
41319 gcc_unreachable ();
41322 switch (outmode)
41324 case SFmode:
41325 gen_neg = gen_negsf2;
41326 break;
41327 case DFmode:
41328 gen_neg = gen_negdf2;
41329 break;
41330 case XFmode:
41331 gen_neg = gen_negxf2;
41332 break;
41333 case HImode:
41334 gen_neg = gen_neghi2;
41335 break;
41336 case SImode:
41337 gen_neg = gen_negsi2;
41338 break;
41339 case DImode:
41340 gen_neg = gen_negdi2;
41341 break;
41342 default:
41343 gcc_unreachable ();
41346 e1 = gen_reg_rtx (inmode);
41347 e2 = gen_reg_rtx (inmode);
41348 res = gen_reg_rtx (outmode);
41350 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41352 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41354 /* scratch = fxam(op1) */
41355 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41356 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41357 UNSPEC_FXAM)));
41358 /* e1 = fabs(op1) */
41359 emit_insn (gen_abs (e1, op1));
41361 /* e2 = e1 + 0.5 */
41362 half = force_reg (inmode, half);
41363 emit_insn (gen_rtx_SET (VOIDmode, e2,
41364 gen_rtx_PLUS (inmode, e1, half)));
41366 /* res = floor(e2) */
41367 if (inmode != XFmode)
41369 tmp1 = gen_reg_rtx (XFmode);
41371 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41372 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41374 else
41375 tmp1 = e2;
41377 switch (outmode)
41379 case SFmode:
41380 case DFmode:
41382 rtx tmp0 = gen_reg_rtx (XFmode);
41384 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41386 emit_insn (gen_rtx_SET (VOIDmode, res,
41387 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41388 UNSPEC_TRUNC_NOOP)));
41390 break;
41391 case XFmode:
41392 emit_insn (gen_frndintxf2_floor (res, tmp1));
41393 break;
41394 case HImode:
41395 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41396 break;
41397 case SImode:
41398 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41399 break;
41400 case DImode:
41401 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41402 break;
41403 default:
41404 gcc_unreachable ();
41407 /* flags = signbit(a) */
41408 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41410 /* if (flags) then res = -res */
41411 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41412 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41413 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41414 pc_rtx);
41415 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41416 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41417 JUMP_LABEL (insn) = jump_label;
41419 emit_insn (gen_neg (res, res));
41421 emit_label (jump_label);
41422 LABEL_NUSES (jump_label) = 1;
41424 emit_move_insn (op0, res);
41427 /* Output code to perform a Newton-Rhapson approximation of a single precision
41428 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41430 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41432 rtx x0, x1, e0, e1;
41434 x0 = gen_reg_rtx (mode);
41435 e0 = gen_reg_rtx (mode);
41436 e1 = gen_reg_rtx (mode);
41437 x1 = gen_reg_rtx (mode);
41439 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41441 b = force_reg (mode, b);
41443 /* x0 = rcp(b) estimate */
41444 if (mode == V16SFmode || mode == V8DFmode)
41445 emit_insn (gen_rtx_SET (VOIDmode, x0,
41446 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41447 UNSPEC_RCP14)));
41448 else
41449 emit_insn (gen_rtx_SET (VOIDmode, x0,
41450 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41451 UNSPEC_RCP)));
41453 /* e0 = x0 * b */
41454 emit_insn (gen_rtx_SET (VOIDmode, e0,
41455 gen_rtx_MULT (mode, x0, b)));
41457 /* e0 = x0 * e0 */
41458 emit_insn (gen_rtx_SET (VOIDmode, e0,
41459 gen_rtx_MULT (mode, x0, e0)));
41461 /* e1 = x0 + x0 */
41462 emit_insn (gen_rtx_SET (VOIDmode, e1,
41463 gen_rtx_PLUS (mode, x0, x0)));
41465 /* x1 = e1 - e0 */
41466 emit_insn (gen_rtx_SET (VOIDmode, x1,
41467 gen_rtx_MINUS (mode, e1, e0)));
41469 /* res = a * x1 */
41470 emit_insn (gen_rtx_SET (VOIDmode, res,
41471 gen_rtx_MULT (mode, a, x1)));
41474 /* Output code to perform a Newton-Rhapson approximation of a
41475 single precision floating point [reciprocal] square root. */
41477 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41478 bool recip)
41480 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41481 REAL_VALUE_TYPE r;
41482 int unspec;
41484 x0 = gen_reg_rtx (mode);
41485 e0 = gen_reg_rtx (mode);
41486 e1 = gen_reg_rtx (mode);
41487 e2 = gen_reg_rtx (mode);
41488 e3 = gen_reg_rtx (mode);
41490 real_from_integer (&r, VOIDmode, -3, SIGNED);
41491 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41493 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41494 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41495 unspec = UNSPEC_RSQRT;
41497 if (VECTOR_MODE_P (mode))
41499 mthree = ix86_build_const_vector (mode, true, mthree);
41500 mhalf = ix86_build_const_vector (mode, true, mhalf);
41501 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41502 if (GET_MODE_SIZE (mode) == 64)
41503 unspec = UNSPEC_RSQRT14;
41506 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41507 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41509 a = force_reg (mode, a);
41511 /* x0 = rsqrt(a) estimate */
41512 emit_insn (gen_rtx_SET (VOIDmode, x0,
41513 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41514 unspec)));
41516 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41517 if (!recip)
41519 rtx zero, mask;
41521 zero = gen_reg_rtx (mode);
41522 mask = gen_reg_rtx (mode);
41524 zero = force_reg (mode, CONST0_RTX(mode));
41526 /* Handle masked compare. */
41527 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41529 mask = gen_reg_rtx (HImode);
41530 /* Imm value 0x4 corresponds to not-equal comparison. */
41531 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41532 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41534 else
41536 emit_insn (gen_rtx_SET (VOIDmode, mask,
41537 gen_rtx_NE (mode, zero, a)));
41539 emit_insn (gen_rtx_SET (VOIDmode, x0,
41540 gen_rtx_AND (mode, x0, mask)));
41544 /* e0 = x0 * a */
41545 emit_insn (gen_rtx_SET (VOIDmode, e0,
41546 gen_rtx_MULT (mode, x0, a)));
41547 /* e1 = e0 * x0 */
41548 emit_insn (gen_rtx_SET (VOIDmode, e1,
41549 gen_rtx_MULT (mode, e0, x0)));
41551 /* e2 = e1 - 3. */
41552 mthree = force_reg (mode, mthree);
41553 emit_insn (gen_rtx_SET (VOIDmode, e2,
41554 gen_rtx_PLUS (mode, e1, mthree)));
41556 mhalf = force_reg (mode, mhalf);
41557 if (recip)
41558 /* e3 = -.5 * x0 */
41559 emit_insn (gen_rtx_SET (VOIDmode, e3,
41560 gen_rtx_MULT (mode, x0, mhalf)));
41561 else
41562 /* e3 = -.5 * e0 */
41563 emit_insn (gen_rtx_SET (VOIDmode, e3,
41564 gen_rtx_MULT (mode, e0, mhalf)));
41565 /* ret = e2 * e3 */
41566 emit_insn (gen_rtx_SET (VOIDmode, res,
41567 gen_rtx_MULT (mode, e2, e3)));
41570 #ifdef TARGET_SOLARIS
41571 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41573 static void
41574 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41575 tree decl)
41577 /* With Binutils 2.15, the "@unwind" marker must be specified on
41578 every occurrence of the ".eh_frame" section, not just the first
41579 one. */
41580 if (TARGET_64BIT
41581 && strcmp (name, ".eh_frame") == 0)
41583 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41584 flags & SECTION_WRITE ? "aw" : "a");
41585 return;
41588 #ifndef USE_GAS
41589 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41591 solaris_elf_asm_comdat_section (name, flags, decl);
41592 return;
41594 #endif
41596 default_elf_asm_named_section (name, flags, decl);
41598 #endif /* TARGET_SOLARIS */
41600 /* Return the mangling of TYPE if it is an extended fundamental type. */
41602 static const char *
41603 ix86_mangle_type (const_tree type)
41605 type = TYPE_MAIN_VARIANT (type);
41607 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41608 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41609 return NULL;
41611 switch (TYPE_MODE (type))
41613 case TFmode:
41614 /* __float128 is "g". */
41615 return "g";
41616 case XFmode:
41617 /* "long double" or __float80 is "e". */
41618 return "e";
41619 default:
41620 return NULL;
41624 /* For 32-bit code we can save PIC register setup by using
41625 __stack_chk_fail_local hidden function instead of calling
41626 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41627 register, so it is better to call __stack_chk_fail directly. */
41629 static tree ATTRIBUTE_UNUSED
41630 ix86_stack_protect_fail (void)
41632 return TARGET_64BIT
41633 ? default_external_stack_protect_fail ()
41634 : default_hidden_stack_protect_fail ();
41637 /* Select a format to encode pointers in exception handling data. CODE
41638 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41639 true if the symbol may be affected by dynamic relocations.
41641 ??? All x86 object file formats are capable of representing this.
41642 After all, the relocation needed is the same as for the call insn.
41643 Whether or not a particular assembler allows us to enter such, I
41644 guess we'll have to see. */
41646 asm_preferred_eh_data_format (int code, int global)
41648 if (flag_pic)
41650 int type = DW_EH_PE_sdata8;
41651 if (!TARGET_64BIT
41652 || ix86_cmodel == CM_SMALL_PIC
41653 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41654 type = DW_EH_PE_sdata4;
41655 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41657 if (ix86_cmodel == CM_SMALL
41658 || (ix86_cmodel == CM_MEDIUM && code))
41659 return DW_EH_PE_udata4;
41660 return DW_EH_PE_absptr;
41663 /* Expand copysign from SIGN to the positive value ABS_VALUE
41664 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41665 the sign-bit. */
41666 static void
41667 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41669 enum machine_mode mode = GET_MODE (sign);
41670 rtx sgn = gen_reg_rtx (mode);
41671 if (mask == NULL_RTX)
41673 enum machine_mode vmode;
41675 if (mode == SFmode)
41676 vmode = V4SFmode;
41677 else if (mode == DFmode)
41678 vmode = V2DFmode;
41679 else
41680 vmode = mode;
41682 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41683 if (!VECTOR_MODE_P (mode))
41685 /* We need to generate a scalar mode mask in this case. */
41686 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41687 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41688 mask = gen_reg_rtx (mode);
41689 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41692 else
41693 mask = gen_rtx_NOT (mode, mask);
41694 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41695 gen_rtx_AND (mode, mask, sign)));
41696 emit_insn (gen_rtx_SET (VOIDmode, result,
41697 gen_rtx_IOR (mode, abs_value, sgn)));
41700 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41701 mask for masking out the sign-bit is stored in *SMASK, if that is
41702 non-null. */
41703 static rtx
41704 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41706 enum machine_mode vmode, mode = GET_MODE (op0);
41707 rtx xa, mask;
41709 xa = gen_reg_rtx (mode);
41710 if (mode == SFmode)
41711 vmode = V4SFmode;
41712 else if (mode == DFmode)
41713 vmode = V2DFmode;
41714 else
41715 vmode = mode;
41716 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41717 if (!VECTOR_MODE_P (mode))
41719 /* We need to generate a scalar mode mask in this case. */
41720 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41721 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41722 mask = gen_reg_rtx (mode);
41723 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41725 emit_insn (gen_rtx_SET (VOIDmode, xa,
41726 gen_rtx_AND (mode, op0, mask)));
41728 if (smask)
41729 *smask = mask;
41731 return xa;
41734 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41735 swapping the operands if SWAP_OPERANDS is true. The expanded
41736 code is a forward jump to a newly created label in case the
41737 comparison is true. The generated label rtx is returned. */
41738 static rtx
41739 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41740 bool swap_operands)
41742 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41743 rtx label, tmp;
41745 if (swap_operands)
41747 tmp = op0;
41748 op0 = op1;
41749 op1 = tmp;
41752 label = gen_label_rtx ();
41753 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41754 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41755 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41756 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41757 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41758 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41759 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41760 JUMP_LABEL (tmp) = label;
41762 return label;
41765 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41766 using comparison code CODE. Operands are swapped for the comparison if
41767 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41768 static rtx
41769 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41770 bool swap_operands)
41772 rtx (*insn)(rtx, rtx, rtx, rtx);
41773 enum machine_mode mode = GET_MODE (op0);
41774 rtx mask = gen_reg_rtx (mode);
41776 if (swap_operands)
41778 rtx tmp = op0;
41779 op0 = op1;
41780 op1 = tmp;
41783 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41785 emit_insn (insn (mask, op0, op1,
41786 gen_rtx_fmt_ee (code, mode, op0, op1)));
41787 return mask;
41790 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41791 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41792 static rtx
41793 ix86_gen_TWO52 (enum machine_mode mode)
41795 REAL_VALUE_TYPE TWO52r;
41796 rtx TWO52;
41798 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41799 TWO52 = const_double_from_real_value (TWO52r, mode);
41800 TWO52 = force_reg (mode, TWO52);
41802 return TWO52;
41805 /* Expand SSE sequence for computing lround from OP1 storing
41806 into OP0. */
41807 void
41808 ix86_expand_lround (rtx op0, rtx op1)
41810 /* C code for the stuff we're doing below:
41811 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41812 return (long)tmp;
41814 enum machine_mode mode = GET_MODE (op1);
41815 const struct real_format *fmt;
41816 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41817 rtx adj;
41819 /* load nextafter (0.5, 0.0) */
41820 fmt = REAL_MODE_FORMAT (mode);
41821 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41822 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41824 /* adj = copysign (0.5, op1) */
41825 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41826 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41828 /* adj = op1 + adj */
41829 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41831 /* op0 = (imode)adj */
41832 expand_fix (op0, adj, 0);
41835 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41836 into OPERAND0. */
41837 void
41838 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41840 /* C code for the stuff we're doing below (for do_floor):
41841 xi = (long)op1;
41842 xi -= (double)xi > op1 ? 1 : 0;
41843 return xi;
41845 enum machine_mode fmode = GET_MODE (op1);
41846 enum machine_mode imode = GET_MODE (op0);
41847 rtx ireg, freg, label, tmp;
41849 /* reg = (long)op1 */
41850 ireg = gen_reg_rtx (imode);
41851 expand_fix (ireg, op1, 0);
41853 /* freg = (double)reg */
41854 freg = gen_reg_rtx (fmode);
41855 expand_float (freg, ireg, 0);
41857 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41858 label = ix86_expand_sse_compare_and_jump (UNLE,
41859 freg, op1, !do_floor);
41860 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41861 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41862 emit_move_insn (ireg, tmp);
41864 emit_label (label);
41865 LABEL_NUSES (label) = 1;
41867 emit_move_insn (op0, ireg);
41870 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41871 result in OPERAND0. */
41872 void
41873 ix86_expand_rint (rtx operand0, rtx operand1)
41875 /* C code for the stuff we're doing below:
41876 xa = fabs (operand1);
41877 if (!isless (xa, 2**52))
41878 return operand1;
41879 xa = xa + 2**52 - 2**52;
41880 return copysign (xa, operand1);
41882 enum machine_mode mode = GET_MODE (operand0);
41883 rtx res, xa, label, TWO52, mask;
41885 res = gen_reg_rtx (mode);
41886 emit_move_insn (res, operand1);
41888 /* xa = abs (operand1) */
41889 xa = ix86_expand_sse_fabs (res, &mask);
41891 /* if (!isless (xa, TWO52)) goto label; */
41892 TWO52 = ix86_gen_TWO52 (mode);
41893 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41895 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41896 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41898 ix86_sse_copysign_to_positive (res, xa, res, mask);
41900 emit_label (label);
41901 LABEL_NUSES (label) = 1;
41903 emit_move_insn (operand0, res);
41906 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41907 into OPERAND0. */
41908 void
41909 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41911 /* C code for the stuff we expand below.
41912 double xa = fabs (x), x2;
41913 if (!isless (xa, TWO52))
41914 return x;
41915 xa = xa + TWO52 - TWO52;
41916 x2 = copysign (xa, x);
41917 Compensate. Floor:
41918 if (x2 > x)
41919 x2 -= 1;
41920 Compensate. Ceil:
41921 if (x2 < x)
41922 x2 -= -1;
41923 return x2;
41925 enum machine_mode mode = GET_MODE (operand0);
41926 rtx xa, TWO52, tmp, label, one, res, mask;
41928 TWO52 = ix86_gen_TWO52 (mode);
41930 /* Temporary for holding the result, initialized to the input
41931 operand to ease control flow. */
41932 res = gen_reg_rtx (mode);
41933 emit_move_insn (res, operand1);
41935 /* xa = abs (operand1) */
41936 xa = ix86_expand_sse_fabs (res, &mask);
41938 /* if (!isless (xa, TWO52)) goto label; */
41939 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41941 /* xa = xa + TWO52 - TWO52; */
41942 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41943 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41945 /* xa = copysign (xa, operand1) */
41946 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41948 /* generate 1.0 or -1.0 */
41949 one = force_reg (mode,
41950 const_double_from_real_value (do_floor
41951 ? dconst1 : dconstm1, mode));
41953 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41954 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41955 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41956 gen_rtx_AND (mode, one, tmp)));
41957 /* We always need to subtract here to preserve signed zero. */
41958 tmp = expand_simple_binop (mode, MINUS,
41959 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41960 emit_move_insn (res, tmp);
41962 emit_label (label);
41963 LABEL_NUSES (label) = 1;
41965 emit_move_insn (operand0, res);
41968 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41969 into OPERAND0. */
41970 void
41971 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41973 /* C code for the stuff we expand below.
41974 double xa = fabs (x), x2;
41975 if (!isless (xa, TWO52))
41976 return x;
41977 x2 = (double)(long)x;
41978 Compensate. Floor:
41979 if (x2 > x)
41980 x2 -= 1;
41981 Compensate. Ceil:
41982 if (x2 < x)
41983 x2 += 1;
41984 if (HONOR_SIGNED_ZEROS (mode))
41985 return copysign (x2, x);
41986 return x2;
41988 enum machine_mode mode = GET_MODE (operand0);
41989 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41991 TWO52 = ix86_gen_TWO52 (mode);
41993 /* Temporary for holding the result, initialized to the input
41994 operand to ease control flow. */
41995 res = gen_reg_rtx (mode);
41996 emit_move_insn (res, operand1);
41998 /* xa = abs (operand1) */
41999 xa = ix86_expand_sse_fabs (res, &mask);
42001 /* if (!isless (xa, TWO52)) goto label; */
42002 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42004 /* xa = (double)(long)x */
42005 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42006 expand_fix (xi, res, 0);
42007 expand_float (xa, xi, 0);
42009 /* generate 1.0 */
42010 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42012 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42013 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42014 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42015 gen_rtx_AND (mode, one, tmp)));
42016 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42017 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42018 emit_move_insn (res, tmp);
42020 if (HONOR_SIGNED_ZEROS (mode))
42021 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42023 emit_label (label);
42024 LABEL_NUSES (label) = 1;
42026 emit_move_insn (operand0, res);
42029 /* Expand SSE sequence for computing round from OPERAND1 storing
42030 into OPERAND0. Sequence that works without relying on DImode truncation
42031 via cvttsd2siq that is only available on 64bit targets. */
42032 void
42033 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42035 /* C code for the stuff we expand below.
42036 double xa = fabs (x), xa2, x2;
42037 if (!isless (xa, TWO52))
42038 return x;
42039 Using the absolute value and copying back sign makes
42040 -0.0 -> -0.0 correct.
42041 xa2 = xa + TWO52 - TWO52;
42042 Compensate.
42043 dxa = xa2 - xa;
42044 if (dxa <= -0.5)
42045 xa2 += 1;
42046 else if (dxa > 0.5)
42047 xa2 -= 1;
42048 x2 = copysign (xa2, x);
42049 return x2;
42051 enum machine_mode mode = GET_MODE (operand0);
42052 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42054 TWO52 = ix86_gen_TWO52 (mode);
42056 /* Temporary for holding the result, initialized to the input
42057 operand to ease control flow. */
42058 res = gen_reg_rtx (mode);
42059 emit_move_insn (res, operand1);
42061 /* xa = abs (operand1) */
42062 xa = ix86_expand_sse_fabs (res, &mask);
42064 /* if (!isless (xa, TWO52)) goto label; */
42065 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42067 /* xa2 = xa + TWO52 - TWO52; */
42068 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42069 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42071 /* dxa = xa2 - xa; */
42072 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42074 /* generate 0.5, 1.0 and -0.5 */
42075 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42076 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42077 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42078 0, OPTAB_DIRECT);
42080 /* Compensate. */
42081 tmp = gen_reg_rtx (mode);
42082 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42083 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42084 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42085 gen_rtx_AND (mode, one, tmp)));
42086 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42087 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42088 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42089 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42090 gen_rtx_AND (mode, one, tmp)));
42091 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42093 /* res = copysign (xa2, operand1) */
42094 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42096 emit_label (label);
42097 LABEL_NUSES (label) = 1;
42099 emit_move_insn (operand0, res);
42102 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42103 into OPERAND0. */
42104 void
42105 ix86_expand_trunc (rtx operand0, rtx operand1)
42107 /* C code for SSE variant we expand below.
42108 double xa = fabs (x), x2;
42109 if (!isless (xa, TWO52))
42110 return x;
42111 x2 = (double)(long)x;
42112 if (HONOR_SIGNED_ZEROS (mode))
42113 return copysign (x2, x);
42114 return x2;
42116 enum machine_mode mode = GET_MODE (operand0);
42117 rtx xa, xi, TWO52, label, res, mask;
42119 TWO52 = ix86_gen_TWO52 (mode);
42121 /* Temporary for holding the result, initialized to the input
42122 operand to ease control flow. */
42123 res = gen_reg_rtx (mode);
42124 emit_move_insn (res, operand1);
42126 /* xa = abs (operand1) */
42127 xa = ix86_expand_sse_fabs (res, &mask);
42129 /* if (!isless (xa, TWO52)) goto label; */
42130 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42132 /* x = (double)(long)x */
42133 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42134 expand_fix (xi, res, 0);
42135 expand_float (res, xi, 0);
42137 if (HONOR_SIGNED_ZEROS (mode))
42138 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42140 emit_label (label);
42141 LABEL_NUSES (label) = 1;
42143 emit_move_insn (operand0, res);
42146 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42147 into OPERAND0. */
42148 void
42149 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42151 enum machine_mode mode = GET_MODE (operand0);
42152 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42154 /* C code for SSE variant we expand below.
42155 double xa = fabs (x), x2;
42156 if (!isless (xa, TWO52))
42157 return x;
42158 xa2 = xa + TWO52 - TWO52;
42159 Compensate:
42160 if (xa2 > xa)
42161 xa2 -= 1.0;
42162 x2 = copysign (xa2, x);
42163 return x2;
42166 TWO52 = ix86_gen_TWO52 (mode);
42168 /* Temporary for holding the result, initialized to the input
42169 operand to ease control flow. */
42170 res = gen_reg_rtx (mode);
42171 emit_move_insn (res, operand1);
42173 /* xa = abs (operand1) */
42174 xa = ix86_expand_sse_fabs (res, &smask);
42176 /* if (!isless (xa, TWO52)) goto label; */
42177 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42179 /* res = xa + TWO52 - TWO52; */
42180 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42181 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42182 emit_move_insn (res, tmp);
42184 /* generate 1.0 */
42185 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42187 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42188 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42189 emit_insn (gen_rtx_SET (VOIDmode, mask,
42190 gen_rtx_AND (mode, mask, one)));
42191 tmp = expand_simple_binop (mode, MINUS,
42192 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42193 emit_move_insn (res, tmp);
42195 /* res = copysign (res, operand1) */
42196 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42198 emit_label (label);
42199 LABEL_NUSES (label) = 1;
42201 emit_move_insn (operand0, res);
42204 /* Expand SSE sequence for computing round from OPERAND1 storing
42205 into OPERAND0. */
42206 void
42207 ix86_expand_round (rtx operand0, rtx operand1)
42209 /* C code for the stuff we're doing below:
42210 double xa = fabs (x);
42211 if (!isless (xa, TWO52))
42212 return x;
42213 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42214 return copysign (xa, x);
42216 enum machine_mode mode = GET_MODE (operand0);
42217 rtx res, TWO52, xa, label, xi, half, mask;
42218 const struct real_format *fmt;
42219 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42221 /* Temporary for holding the result, initialized to the input
42222 operand to ease control flow. */
42223 res = gen_reg_rtx (mode);
42224 emit_move_insn (res, operand1);
42226 TWO52 = ix86_gen_TWO52 (mode);
42227 xa = ix86_expand_sse_fabs (res, &mask);
42228 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42230 /* load nextafter (0.5, 0.0) */
42231 fmt = REAL_MODE_FORMAT (mode);
42232 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42233 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42235 /* xa = xa + 0.5 */
42236 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42237 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42239 /* xa = (double)(int64_t)xa */
42240 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42241 expand_fix (xi, xa, 0);
42242 expand_float (xa, xi, 0);
42244 /* res = copysign (xa, operand1) */
42245 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42247 emit_label (label);
42248 LABEL_NUSES (label) = 1;
42250 emit_move_insn (operand0, res);
42253 /* Expand SSE sequence for computing round
42254 from OP1 storing into OP0 using sse4 round insn. */
42255 void
42256 ix86_expand_round_sse4 (rtx op0, rtx op1)
42258 enum machine_mode mode = GET_MODE (op0);
42259 rtx e1, e2, res, half;
42260 const struct real_format *fmt;
42261 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42262 rtx (*gen_copysign) (rtx, rtx, rtx);
42263 rtx (*gen_round) (rtx, rtx, rtx);
42265 switch (mode)
42267 case SFmode:
42268 gen_copysign = gen_copysignsf3;
42269 gen_round = gen_sse4_1_roundsf2;
42270 break;
42271 case DFmode:
42272 gen_copysign = gen_copysigndf3;
42273 gen_round = gen_sse4_1_rounddf2;
42274 break;
42275 default:
42276 gcc_unreachable ();
42279 /* round (a) = trunc (a + copysign (0.5, a)) */
42281 /* load nextafter (0.5, 0.0) */
42282 fmt = REAL_MODE_FORMAT (mode);
42283 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42284 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42285 half = const_double_from_real_value (pred_half, mode);
42287 /* e1 = copysign (0.5, op1) */
42288 e1 = gen_reg_rtx (mode);
42289 emit_insn (gen_copysign (e1, half, op1));
42291 /* e2 = op1 + e1 */
42292 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42294 /* res = trunc (e2) */
42295 res = gen_reg_rtx (mode);
42296 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42298 emit_move_insn (op0, res);
42302 /* Table of valid machine attributes. */
42303 static const struct attribute_spec ix86_attribute_table[] =
42305 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42306 affects_type_identity } */
42307 /* Stdcall attribute says callee is responsible for popping arguments
42308 if they are not variable. */
42309 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42310 true },
42311 /* Fastcall attribute says callee is responsible for popping arguments
42312 if they are not variable. */
42313 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42314 true },
42315 /* Thiscall attribute says callee is responsible for popping arguments
42316 if they are not variable. */
42317 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42318 true },
42319 /* Cdecl attribute says the callee is a normal C declaration */
42320 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42321 true },
42322 /* Regparm attribute specifies how many integer arguments are to be
42323 passed in registers. */
42324 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42325 true },
42326 /* Sseregparm attribute says we are using x86_64 calling conventions
42327 for FP arguments. */
42328 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42329 true },
42330 /* The transactional memory builtins are implicitly regparm or fastcall
42331 depending on the ABI. Override the generic do-nothing attribute that
42332 these builtins were declared with. */
42333 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42334 true },
42335 /* force_align_arg_pointer says this function realigns the stack at entry. */
42336 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42337 false, true, true, ix86_handle_cconv_attribute, false },
42338 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42339 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42340 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42341 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42342 false },
42343 #endif
42344 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42345 false },
42346 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42347 false },
42348 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42349 SUBTARGET_ATTRIBUTE_TABLE,
42350 #endif
42351 /* ms_abi and sysv_abi calling convention function attributes. */
42352 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42353 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42354 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42355 false },
42356 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42357 ix86_handle_callee_pop_aggregate_return, true },
42358 /* End element. */
42359 { NULL, 0, 0, false, false, false, NULL, false }
42362 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42363 static int
42364 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42365 tree vectype,
42366 int misalign ATTRIBUTE_UNUSED)
42368 unsigned elements;
42370 switch (type_of_cost)
42372 case scalar_stmt:
42373 return ix86_cost->scalar_stmt_cost;
42375 case scalar_load:
42376 return ix86_cost->scalar_load_cost;
42378 case scalar_store:
42379 return ix86_cost->scalar_store_cost;
42381 case vector_stmt:
42382 return ix86_cost->vec_stmt_cost;
42384 case vector_load:
42385 return ix86_cost->vec_align_load_cost;
42387 case vector_store:
42388 return ix86_cost->vec_store_cost;
42390 case vec_to_scalar:
42391 return ix86_cost->vec_to_scalar_cost;
42393 case scalar_to_vec:
42394 return ix86_cost->scalar_to_vec_cost;
42396 case unaligned_load:
42397 case unaligned_store:
42398 return ix86_cost->vec_unalign_load_cost;
42400 case cond_branch_taken:
42401 return ix86_cost->cond_taken_branch_cost;
42403 case cond_branch_not_taken:
42404 return ix86_cost->cond_not_taken_branch_cost;
42406 case vec_perm:
42407 case vec_promote_demote:
42408 return ix86_cost->vec_stmt_cost;
42410 case vec_construct:
42411 elements = TYPE_VECTOR_SUBPARTS (vectype);
42412 return elements / 2 + 1;
42414 default:
42415 gcc_unreachable ();
42419 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42420 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42421 insn every time. */
42423 static GTY(()) rtx vselect_insn;
42425 /* Initialize vselect_insn. */
42427 static void
42428 init_vselect_insn (void)
42430 unsigned i;
42431 rtx x;
42433 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42434 for (i = 0; i < MAX_VECT_LEN; ++i)
42435 XVECEXP (x, 0, i) = const0_rtx;
42436 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42437 const0_rtx), x);
42438 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42439 start_sequence ();
42440 vselect_insn = emit_insn (x);
42441 end_sequence ();
42444 /* Construct (set target (vec_select op0 (parallel perm))) and
42445 return true if that's a valid instruction in the active ISA. */
42447 static bool
42448 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42449 unsigned nelt, bool testing_p)
42451 unsigned int i;
42452 rtx x, save_vconcat;
42453 int icode;
42455 if (vselect_insn == NULL_RTX)
42456 init_vselect_insn ();
42458 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42459 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42460 for (i = 0; i < nelt; ++i)
42461 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42462 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42463 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42464 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42465 SET_DEST (PATTERN (vselect_insn)) = target;
42466 icode = recog_memoized (vselect_insn);
42468 if (icode >= 0 && !testing_p)
42469 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42471 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42472 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42473 INSN_CODE (vselect_insn) = -1;
42475 return icode >= 0;
42478 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42480 static bool
42481 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42482 const unsigned char *perm, unsigned nelt,
42483 bool testing_p)
42485 enum machine_mode v2mode;
42486 rtx x;
42487 bool ok;
42489 if (vselect_insn == NULL_RTX)
42490 init_vselect_insn ();
42492 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42493 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42494 PUT_MODE (x, v2mode);
42495 XEXP (x, 0) = op0;
42496 XEXP (x, 1) = op1;
42497 ok = expand_vselect (target, x, perm, nelt, testing_p);
42498 XEXP (x, 0) = const0_rtx;
42499 XEXP (x, 1) = const0_rtx;
42500 return ok;
42503 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42504 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42506 static bool
42507 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42509 enum machine_mode vmode = d->vmode;
42510 unsigned i, mask, nelt = d->nelt;
42511 rtx target, op0, op1, x;
42512 rtx rperm[32], vperm;
42514 if (d->one_operand_p)
42515 return false;
42516 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42518 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42520 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42522 else
42523 return false;
42525 /* This is a blend, not a permute. Elements must stay in their
42526 respective lanes. */
42527 for (i = 0; i < nelt; ++i)
42529 unsigned e = d->perm[i];
42530 if (!(e == i || e == i + nelt))
42531 return false;
42534 if (d->testing_p)
42535 return true;
42537 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42538 decision should be extracted elsewhere, so that we only try that
42539 sequence once all budget==3 options have been tried. */
42540 target = d->target;
42541 op0 = d->op0;
42542 op1 = d->op1;
42543 mask = 0;
42545 switch (vmode)
42547 case V4DFmode:
42548 case V8SFmode:
42549 case V2DFmode:
42550 case V4SFmode:
42551 case V8HImode:
42552 case V8SImode:
42553 for (i = 0; i < nelt; ++i)
42554 mask |= (d->perm[i] >= nelt) << i;
42555 break;
42557 case V2DImode:
42558 for (i = 0; i < 2; ++i)
42559 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42560 vmode = V8HImode;
42561 goto do_subreg;
42563 case V4SImode:
42564 for (i = 0; i < 4; ++i)
42565 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42566 vmode = V8HImode;
42567 goto do_subreg;
42569 case V16QImode:
42570 /* See if bytes move in pairs so we can use pblendw with
42571 an immediate argument, rather than pblendvb with a vector
42572 argument. */
42573 for (i = 0; i < 16; i += 2)
42574 if (d->perm[i] + 1 != d->perm[i + 1])
42576 use_pblendvb:
42577 for (i = 0; i < nelt; ++i)
42578 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42580 finish_pblendvb:
42581 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42582 vperm = force_reg (vmode, vperm);
42584 if (GET_MODE_SIZE (vmode) == 16)
42585 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42586 else
42587 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42588 if (target != d->target)
42589 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42590 return true;
42593 for (i = 0; i < 8; ++i)
42594 mask |= (d->perm[i * 2] >= 16) << i;
42595 vmode = V8HImode;
42596 /* FALLTHRU */
42598 do_subreg:
42599 target = gen_reg_rtx (vmode);
42600 op0 = gen_lowpart (vmode, op0);
42601 op1 = gen_lowpart (vmode, op1);
42602 break;
42604 case V32QImode:
42605 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42606 for (i = 0; i < 32; i += 2)
42607 if (d->perm[i] + 1 != d->perm[i + 1])
42608 goto use_pblendvb;
42609 /* See if bytes move in quadruplets. If yes, vpblendd
42610 with immediate can be used. */
42611 for (i = 0; i < 32; i += 4)
42612 if (d->perm[i] + 2 != d->perm[i + 2])
42613 break;
42614 if (i < 32)
42616 /* See if bytes move the same in both lanes. If yes,
42617 vpblendw with immediate can be used. */
42618 for (i = 0; i < 16; i += 2)
42619 if (d->perm[i] + 16 != d->perm[i + 16])
42620 goto use_pblendvb;
42622 /* Use vpblendw. */
42623 for (i = 0; i < 16; ++i)
42624 mask |= (d->perm[i * 2] >= 32) << i;
42625 vmode = V16HImode;
42626 goto do_subreg;
42629 /* Use vpblendd. */
42630 for (i = 0; i < 8; ++i)
42631 mask |= (d->perm[i * 4] >= 32) << i;
42632 vmode = V8SImode;
42633 goto do_subreg;
42635 case V16HImode:
42636 /* See if words move in pairs. If yes, vpblendd can be used. */
42637 for (i = 0; i < 16; i += 2)
42638 if (d->perm[i] + 1 != d->perm[i + 1])
42639 break;
42640 if (i < 16)
42642 /* See if words move the same in both lanes. If not,
42643 vpblendvb must be used. */
42644 for (i = 0; i < 8; i++)
42645 if (d->perm[i] + 8 != d->perm[i + 8])
42647 /* Use vpblendvb. */
42648 for (i = 0; i < 32; ++i)
42649 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42651 vmode = V32QImode;
42652 nelt = 32;
42653 target = gen_reg_rtx (vmode);
42654 op0 = gen_lowpart (vmode, op0);
42655 op1 = gen_lowpart (vmode, op1);
42656 goto finish_pblendvb;
42659 /* Use vpblendw. */
42660 for (i = 0; i < 16; ++i)
42661 mask |= (d->perm[i] >= 16) << i;
42662 break;
42665 /* Use vpblendd. */
42666 for (i = 0; i < 8; ++i)
42667 mask |= (d->perm[i * 2] >= 16) << i;
42668 vmode = V8SImode;
42669 goto do_subreg;
42671 case V4DImode:
42672 /* Use vpblendd. */
42673 for (i = 0; i < 4; ++i)
42674 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42675 vmode = V8SImode;
42676 goto do_subreg;
42678 default:
42679 gcc_unreachable ();
42682 /* This matches five different patterns with the different modes. */
42683 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42684 x = gen_rtx_SET (VOIDmode, target, x);
42685 emit_insn (x);
42686 if (target != d->target)
42687 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42689 return true;
42692 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42693 in terms of the variable form of vpermilps.
42695 Note that we will have already failed the immediate input vpermilps,
42696 which requires that the high and low part shuffle be identical; the
42697 variable form doesn't require that. */
42699 static bool
42700 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42702 rtx rperm[8], vperm;
42703 unsigned i;
42705 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42706 return false;
42708 /* We can only permute within the 128-bit lane. */
42709 for (i = 0; i < 8; ++i)
42711 unsigned e = d->perm[i];
42712 if (i < 4 ? e >= 4 : e < 4)
42713 return false;
42716 if (d->testing_p)
42717 return true;
42719 for (i = 0; i < 8; ++i)
42721 unsigned e = d->perm[i];
42723 /* Within each 128-bit lane, the elements of op0 are numbered
42724 from 0 and the elements of op1 are numbered from 4. */
42725 if (e >= 8 + 4)
42726 e -= 8;
42727 else if (e >= 4)
42728 e -= 4;
42730 rperm[i] = GEN_INT (e);
42733 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42734 vperm = force_reg (V8SImode, vperm);
42735 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42737 return true;
42740 /* Return true if permutation D can be performed as VMODE permutation
42741 instead. */
42743 static bool
42744 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42746 unsigned int i, j, chunk;
42748 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42749 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42750 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42751 return false;
42753 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42754 return true;
42756 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42757 for (i = 0; i < d->nelt; i += chunk)
42758 if (d->perm[i] & (chunk - 1))
42759 return false;
42760 else
42761 for (j = 1; j < chunk; ++j)
42762 if (d->perm[i] + j != d->perm[i + j])
42763 return false;
42765 return true;
42768 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42769 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42771 static bool
42772 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42774 unsigned i, nelt, eltsz, mask;
42775 unsigned char perm[32];
42776 enum machine_mode vmode = V16QImode;
42777 rtx rperm[32], vperm, target, op0, op1;
42779 nelt = d->nelt;
42781 if (!d->one_operand_p)
42783 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42785 if (TARGET_AVX2
42786 && valid_perm_using_mode_p (V2TImode, d))
42788 if (d->testing_p)
42789 return true;
42791 /* Use vperm2i128 insn. The pattern uses
42792 V4DImode instead of V2TImode. */
42793 target = d->target;
42794 if (d->vmode != V4DImode)
42795 target = gen_reg_rtx (V4DImode);
42796 op0 = gen_lowpart (V4DImode, d->op0);
42797 op1 = gen_lowpart (V4DImode, d->op1);
42798 rperm[0]
42799 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42800 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42801 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42802 if (target != d->target)
42803 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42804 return true;
42806 return false;
42809 else
42811 if (GET_MODE_SIZE (d->vmode) == 16)
42813 if (!TARGET_SSSE3)
42814 return false;
42816 else if (GET_MODE_SIZE (d->vmode) == 32)
42818 if (!TARGET_AVX2)
42819 return false;
42821 /* V4DImode should be already handled through
42822 expand_vselect by vpermq instruction. */
42823 gcc_assert (d->vmode != V4DImode);
42825 vmode = V32QImode;
42826 if (d->vmode == V8SImode
42827 || d->vmode == V16HImode
42828 || d->vmode == V32QImode)
42830 /* First see if vpermq can be used for
42831 V8SImode/V16HImode/V32QImode. */
42832 if (valid_perm_using_mode_p (V4DImode, d))
42834 for (i = 0; i < 4; i++)
42835 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42836 if (d->testing_p)
42837 return true;
42838 target = gen_reg_rtx (V4DImode);
42839 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42840 perm, 4, false))
42842 emit_move_insn (d->target,
42843 gen_lowpart (d->vmode, target));
42844 return true;
42846 return false;
42849 /* Next see if vpermd can be used. */
42850 if (valid_perm_using_mode_p (V8SImode, d))
42851 vmode = V8SImode;
42853 /* Or if vpermps can be used. */
42854 else if (d->vmode == V8SFmode)
42855 vmode = V8SImode;
42857 if (vmode == V32QImode)
42859 /* vpshufb only works intra lanes, it is not
42860 possible to shuffle bytes in between the lanes. */
42861 for (i = 0; i < nelt; ++i)
42862 if ((d->perm[i] ^ i) & (nelt / 2))
42863 return false;
42866 else
42867 return false;
42870 if (d->testing_p)
42871 return true;
42873 if (vmode == V8SImode)
42874 for (i = 0; i < 8; ++i)
42875 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42876 else
42878 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42879 if (!d->one_operand_p)
42880 mask = 2 * nelt - 1;
42881 else if (vmode == V16QImode)
42882 mask = nelt - 1;
42883 else
42884 mask = nelt / 2 - 1;
42886 for (i = 0; i < nelt; ++i)
42888 unsigned j, e = d->perm[i] & mask;
42889 for (j = 0; j < eltsz; ++j)
42890 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42894 vperm = gen_rtx_CONST_VECTOR (vmode,
42895 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42896 vperm = force_reg (vmode, vperm);
42898 target = d->target;
42899 if (d->vmode != vmode)
42900 target = gen_reg_rtx (vmode);
42901 op0 = gen_lowpart (vmode, d->op0);
42902 if (d->one_operand_p)
42904 if (vmode == V16QImode)
42905 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42906 else if (vmode == V32QImode)
42907 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42908 else if (vmode == V8SFmode)
42909 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42910 else
42911 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42913 else
42915 op1 = gen_lowpart (vmode, d->op1);
42916 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42918 if (target != d->target)
42919 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42921 return true;
42924 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42925 in a single instruction. */
42927 static bool
42928 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42930 unsigned i, nelt = d->nelt;
42931 unsigned char perm2[MAX_VECT_LEN];
42933 /* Check plain VEC_SELECT first, because AVX has instructions that could
42934 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42935 input where SEL+CONCAT may not. */
42936 if (d->one_operand_p)
42938 int mask = nelt - 1;
42939 bool identity_perm = true;
42940 bool broadcast_perm = true;
42942 for (i = 0; i < nelt; i++)
42944 perm2[i] = d->perm[i] & mask;
42945 if (perm2[i] != i)
42946 identity_perm = false;
42947 if (perm2[i])
42948 broadcast_perm = false;
42951 if (identity_perm)
42953 if (!d->testing_p)
42954 emit_move_insn (d->target, d->op0);
42955 return true;
42957 else if (broadcast_perm && TARGET_AVX2)
42959 /* Use vpbroadcast{b,w,d}. */
42960 rtx (*gen) (rtx, rtx) = NULL;
42961 switch (d->vmode)
42963 case V32QImode:
42964 gen = gen_avx2_pbroadcastv32qi_1;
42965 break;
42966 case V16HImode:
42967 gen = gen_avx2_pbroadcastv16hi_1;
42968 break;
42969 case V8SImode:
42970 gen = gen_avx2_pbroadcastv8si_1;
42971 break;
42972 case V16QImode:
42973 gen = gen_avx2_pbroadcastv16qi;
42974 break;
42975 case V8HImode:
42976 gen = gen_avx2_pbroadcastv8hi;
42977 break;
42978 case V8SFmode:
42979 gen = gen_avx2_vec_dupv8sf_1;
42980 break;
42981 /* For other modes prefer other shuffles this function creates. */
42982 default: break;
42984 if (gen != NULL)
42986 if (!d->testing_p)
42987 emit_insn (gen (d->target, d->op0));
42988 return true;
42992 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42993 return true;
42995 /* There are plenty of patterns in sse.md that are written for
42996 SEL+CONCAT and are not replicated for a single op. Perhaps
42997 that should be changed, to avoid the nastiness here. */
42999 /* Recognize interleave style patterns, which means incrementing
43000 every other permutation operand. */
43001 for (i = 0; i < nelt; i += 2)
43003 perm2[i] = d->perm[i] & mask;
43004 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43006 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43007 d->testing_p))
43008 return true;
43010 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43011 if (nelt >= 4)
43013 for (i = 0; i < nelt; i += 4)
43015 perm2[i + 0] = d->perm[i + 0] & mask;
43016 perm2[i + 1] = d->perm[i + 1] & mask;
43017 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43018 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43021 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43022 d->testing_p))
43023 return true;
43027 /* Finally, try the fully general two operand permute. */
43028 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43029 d->testing_p))
43030 return true;
43032 /* Recognize interleave style patterns with reversed operands. */
43033 if (!d->one_operand_p)
43035 for (i = 0; i < nelt; ++i)
43037 unsigned e = d->perm[i];
43038 if (e >= nelt)
43039 e -= nelt;
43040 else
43041 e += nelt;
43042 perm2[i] = e;
43045 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43046 d->testing_p))
43047 return true;
43050 /* Try the SSE4.1 blend variable merge instructions. */
43051 if (expand_vec_perm_blend (d))
43052 return true;
43054 /* Try one of the AVX vpermil variable permutations. */
43055 if (expand_vec_perm_vpermil (d))
43056 return true;
43058 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43059 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43060 if (expand_vec_perm_pshufb (d))
43061 return true;
43063 /* Try the AVX512F vpermi2 instructions. */
43064 rtx vec[64];
43065 enum machine_mode mode = d->vmode;
43066 if (mode == V8DFmode)
43067 mode = V8DImode;
43068 else if (mode == V16SFmode)
43069 mode = V16SImode;
43070 for (i = 0; i < nelt; ++i)
43071 vec[i] = GEN_INT (d->perm[i]);
43072 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43073 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43074 return true;
43076 return false;
43079 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43080 in terms of a pair of pshuflw + pshufhw instructions. */
43082 static bool
43083 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43085 unsigned char perm2[MAX_VECT_LEN];
43086 unsigned i;
43087 bool ok;
43089 if (d->vmode != V8HImode || !d->one_operand_p)
43090 return false;
43092 /* The two permutations only operate in 64-bit lanes. */
43093 for (i = 0; i < 4; ++i)
43094 if (d->perm[i] >= 4)
43095 return false;
43096 for (i = 4; i < 8; ++i)
43097 if (d->perm[i] < 4)
43098 return false;
43100 if (d->testing_p)
43101 return true;
43103 /* Emit the pshuflw. */
43104 memcpy (perm2, d->perm, 4);
43105 for (i = 4; i < 8; ++i)
43106 perm2[i] = i;
43107 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43108 gcc_assert (ok);
43110 /* Emit the pshufhw. */
43111 memcpy (perm2 + 4, d->perm + 4, 4);
43112 for (i = 0; i < 4; ++i)
43113 perm2[i] = i;
43114 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43115 gcc_assert (ok);
43117 return true;
43120 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43121 the permutation using the SSSE3 palignr instruction. This succeeds
43122 when all of the elements in PERM fit within one vector and we merely
43123 need to shift them down so that a single vector permutation has a
43124 chance to succeed. */
43126 static bool
43127 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43129 unsigned i, nelt = d->nelt;
43130 unsigned min, max;
43131 bool in_order, ok;
43132 rtx shift, target;
43133 struct expand_vec_perm_d dcopy;
43135 /* Even with AVX, palignr only operates on 128-bit vectors. */
43136 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43137 return false;
43139 min = nelt, max = 0;
43140 for (i = 0; i < nelt; ++i)
43142 unsigned e = d->perm[i];
43143 if (e < min)
43144 min = e;
43145 if (e > max)
43146 max = e;
43148 if (min == 0 || max - min >= nelt)
43149 return false;
43151 /* Given that we have SSSE3, we know we'll be able to implement the
43152 single operand permutation after the palignr with pshufb. */
43153 if (d->testing_p)
43154 return true;
43156 dcopy = *d;
43157 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43158 target = gen_reg_rtx (TImode);
43159 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43160 gen_lowpart (TImode, d->op0), shift));
43162 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43163 dcopy.one_operand_p = true;
43165 in_order = true;
43166 for (i = 0; i < nelt; ++i)
43168 unsigned e = dcopy.perm[i] - min;
43169 if (e != i)
43170 in_order = false;
43171 dcopy.perm[i] = e;
43174 /* Test for the degenerate case where the alignment by itself
43175 produces the desired permutation. */
43176 if (in_order)
43178 emit_move_insn (d->target, dcopy.op0);
43179 return true;
43182 ok = expand_vec_perm_1 (&dcopy);
43183 gcc_assert (ok);
43185 return ok;
43188 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43190 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43191 a two vector permutation into a single vector permutation by using
43192 an interleave operation to merge the vectors. */
43194 static bool
43195 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43197 struct expand_vec_perm_d dremap, dfinal;
43198 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43199 unsigned HOST_WIDE_INT contents;
43200 unsigned char remap[2 * MAX_VECT_LEN];
43201 rtx seq;
43202 bool ok, same_halves = false;
43204 if (GET_MODE_SIZE (d->vmode) == 16)
43206 if (d->one_operand_p)
43207 return false;
43209 else if (GET_MODE_SIZE (d->vmode) == 32)
43211 if (!TARGET_AVX)
43212 return false;
43213 /* For 32-byte modes allow even d->one_operand_p.
43214 The lack of cross-lane shuffling in some instructions
43215 might prevent a single insn shuffle. */
43216 dfinal = *d;
43217 dfinal.testing_p = true;
43218 /* If expand_vec_perm_interleave3 can expand this into
43219 a 3 insn sequence, give up and let it be expanded as
43220 3 insn sequence. While that is one insn longer,
43221 it doesn't need a memory operand and in the common
43222 case that both interleave low and high permutations
43223 with the same operands are adjacent needs 4 insns
43224 for both after CSE. */
43225 if (expand_vec_perm_interleave3 (&dfinal))
43226 return false;
43228 else
43229 return false;
43231 /* Examine from whence the elements come. */
43232 contents = 0;
43233 for (i = 0; i < nelt; ++i)
43234 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43236 memset (remap, 0xff, sizeof (remap));
43237 dremap = *d;
43239 if (GET_MODE_SIZE (d->vmode) == 16)
43241 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43243 /* Split the two input vectors into 4 halves. */
43244 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43245 h2 = h1 << nelt2;
43246 h3 = h2 << nelt2;
43247 h4 = h3 << nelt2;
43249 /* If the elements from the low halves use interleave low, and similarly
43250 for interleave high. If the elements are from mis-matched halves, we
43251 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43252 if ((contents & (h1 | h3)) == contents)
43254 /* punpckl* */
43255 for (i = 0; i < nelt2; ++i)
43257 remap[i] = i * 2;
43258 remap[i + nelt] = i * 2 + 1;
43259 dremap.perm[i * 2] = i;
43260 dremap.perm[i * 2 + 1] = i + nelt;
43262 if (!TARGET_SSE2 && d->vmode == V4SImode)
43263 dremap.vmode = V4SFmode;
43265 else if ((contents & (h2 | h4)) == contents)
43267 /* punpckh* */
43268 for (i = 0; i < nelt2; ++i)
43270 remap[i + nelt2] = i * 2;
43271 remap[i + nelt + nelt2] = i * 2 + 1;
43272 dremap.perm[i * 2] = i + nelt2;
43273 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43275 if (!TARGET_SSE2 && d->vmode == V4SImode)
43276 dremap.vmode = V4SFmode;
43278 else if ((contents & (h1 | h4)) == contents)
43280 /* shufps */
43281 for (i = 0; i < nelt2; ++i)
43283 remap[i] = i;
43284 remap[i + nelt + nelt2] = i + nelt2;
43285 dremap.perm[i] = i;
43286 dremap.perm[i + nelt2] = i + nelt + nelt2;
43288 if (nelt != 4)
43290 /* shufpd */
43291 dremap.vmode = V2DImode;
43292 dremap.nelt = 2;
43293 dremap.perm[0] = 0;
43294 dremap.perm[1] = 3;
43297 else if ((contents & (h2 | h3)) == contents)
43299 /* shufps */
43300 for (i = 0; i < nelt2; ++i)
43302 remap[i + nelt2] = i;
43303 remap[i + nelt] = i + nelt2;
43304 dremap.perm[i] = i + nelt2;
43305 dremap.perm[i + nelt2] = i + nelt;
43307 if (nelt != 4)
43309 /* shufpd */
43310 dremap.vmode = V2DImode;
43311 dremap.nelt = 2;
43312 dremap.perm[0] = 1;
43313 dremap.perm[1] = 2;
43316 else
43317 return false;
43319 else
43321 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43322 unsigned HOST_WIDE_INT q[8];
43323 unsigned int nonzero_halves[4];
43325 /* Split the two input vectors into 8 quarters. */
43326 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43327 for (i = 1; i < 8; ++i)
43328 q[i] = q[0] << (nelt4 * i);
43329 for (i = 0; i < 4; ++i)
43330 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43332 nonzero_halves[nzcnt] = i;
43333 ++nzcnt;
43336 if (nzcnt == 1)
43338 gcc_assert (d->one_operand_p);
43339 nonzero_halves[1] = nonzero_halves[0];
43340 same_halves = true;
43342 else if (d->one_operand_p)
43344 gcc_assert (nonzero_halves[0] == 0);
43345 gcc_assert (nonzero_halves[1] == 1);
43348 if (nzcnt <= 2)
43350 if (d->perm[0] / nelt2 == nonzero_halves[1])
43352 /* Attempt to increase the likelihood that dfinal
43353 shuffle will be intra-lane. */
43354 char tmph = nonzero_halves[0];
43355 nonzero_halves[0] = nonzero_halves[1];
43356 nonzero_halves[1] = tmph;
43359 /* vperm2f128 or vperm2i128. */
43360 for (i = 0; i < nelt2; ++i)
43362 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43363 remap[i + nonzero_halves[0] * nelt2] = i;
43364 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43365 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43368 if (d->vmode != V8SFmode
43369 && d->vmode != V4DFmode
43370 && d->vmode != V8SImode)
43372 dremap.vmode = V8SImode;
43373 dremap.nelt = 8;
43374 for (i = 0; i < 4; ++i)
43376 dremap.perm[i] = i + nonzero_halves[0] * 4;
43377 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43381 else if (d->one_operand_p)
43382 return false;
43383 else if (TARGET_AVX2
43384 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43386 /* vpunpckl* */
43387 for (i = 0; i < nelt4; ++i)
43389 remap[i] = i * 2;
43390 remap[i + nelt] = i * 2 + 1;
43391 remap[i + nelt2] = i * 2 + nelt2;
43392 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43393 dremap.perm[i * 2] = i;
43394 dremap.perm[i * 2 + 1] = i + nelt;
43395 dremap.perm[i * 2 + nelt2] = i + nelt2;
43396 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43399 else if (TARGET_AVX2
43400 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43402 /* vpunpckh* */
43403 for (i = 0; i < nelt4; ++i)
43405 remap[i + nelt4] = i * 2;
43406 remap[i + nelt + nelt4] = i * 2 + 1;
43407 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43408 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43409 dremap.perm[i * 2] = i + nelt4;
43410 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43411 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43412 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43415 else
43416 return false;
43419 /* Use the remapping array set up above to move the elements from their
43420 swizzled locations into their final destinations. */
43421 dfinal = *d;
43422 for (i = 0; i < nelt; ++i)
43424 unsigned e = remap[d->perm[i]];
43425 gcc_assert (e < nelt);
43426 /* If same_halves is true, both halves of the remapped vector are the
43427 same. Avoid cross-lane accesses if possible. */
43428 if (same_halves && i >= nelt2)
43430 gcc_assert (e < nelt2);
43431 dfinal.perm[i] = e + nelt2;
43433 else
43434 dfinal.perm[i] = e;
43436 if (!d->testing_p)
43438 dremap.target = gen_reg_rtx (dremap.vmode);
43439 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43441 dfinal.op1 = dfinal.op0;
43442 dfinal.one_operand_p = true;
43444 /* Test if the final remap can be done with a single insn. For V4SFmode or
43445 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43446 start_sequence ();
43447 ok = expand_vec_perm_1 (&dfinal);
43448 seq = get_insns ();
43449 end_sequence ();
43451 if (!ok)
43452 return false;
43454 if (d->testing_p)
43455 return true;
43457 if (dremap.vmode != dfinal.vmode)
43459 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43460 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43463 ok = expand_vec_perm_1 (&dremap);
43464 gcc_assert (ok);
43466 emit_insn (seq);
43467 return true;
43470 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43471 a single vector cross-lane permutation into vpermq followed
43472 by any of the single insn permutations. */
43474 static bool
43475 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43477 struct expand_vec_perm_d dremap, dfinal;
43478 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43479 unsigned contents[2];
43480 bool ok;
43482 if (!(TARGET_AVX2
43483 && (d->vmode == V32QImode || d->vmode == V16HImode)
43484 && d->one_operand_p))
43485 return false;
43487 contents[0] = 0;
43488 contents[1] = 0;
43489 for (i = 0; i < nelt2; ++i)
43491 contents[0] |= 1u << (d->perm[i] / nelt4);
43492 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43495 for (i = 0; i < 2; ++i)
43497 unsigned int cnt = 0;
43498 for (j = 0; j < 4; ++j)
43499 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43500 return false;
43503 if (d->testing_p)
43504 return true;
43506 dremap = *d;
43507 dremap.vmode = V4DImode;
43508 dremap.nelt = 4;
43509 dremap.target = gen_reg_rtx (V4DImode);
43510 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43511 dremap.op1 = dremap.op0;
43512 dremap.one_operand_p = true;
43513 for (i = 0; i < 2; ++i)
43515 unsigned int cnt = 0;
43516 for (j = 0; j < 4; ++j)
43517 if ((contents[i] & (1u << j)) != 0)
43518 dremap.perm[2 * i + cnt++] = j;
43519 for (; cnt < 2; ++cnt)
43520 dremap.perm[2 * i + cnt] = 0;
43523 dfinal = *d;
43524 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43525 dfinal.op1 = dfinal.op0;
43526 dfinal.one_operand_p = true;
43527 for (i = 0, j = 0; i < nelt; ++i)
43529 if (i == nelt2)
43530 j = 2;
43531 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43532 if ((d->perm[i] / nelt4) == dremap.perm[j])
43534 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43535 dfinal.perm[i] |= nelt4;
43536 else
43537 gcc_unreachable ();
43540 ok = expand_vec_perm_1 (&dremap);
43541 gcc_assert (ok);
43543 ok = expand_vec_perm_1 (&dfinal);
43544 gcc_assert (ok);
43546 return true;
43549 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43550 a vector permutation using two instructions, vperm2f128 resp.
43551 vperm2i128 followed by any single in-lane permutation. */
43553 static bool
43554 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43556 struct expand_vec_perm_d dfirst, dsecond;
43557 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43558 bool ok;
43560 if (!TARGET_AVX
43561 || GET_MODE_SIZE (d->vmode) != 32
43562 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43563 return false;
43565 dsecond = *d;
43566 dsecond.one_operand_p = false;
43567 dsecond.testing_p = true;
43569 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43570 immediate. For perm < 16 the second permutation uses
43571 d->op0 as first operand, for perm >= 16 it uses d->op1
43572 as first operand. The second operand is the result of
43573 vperm2[fi]128. */
43574 for (perm = 0; perm < 32; perm++)
43576 /* Ignore permutations which do not move anything cross-lane. */
43577 if (perm < 16)
43579 /* The second shuffle for e.g. V4DFmode has
43580 0123 and ABCD operands.
43581 Ignore AB23, as 23 is already in the second lane
43582 of the first operand. */
43583 if ((perm & 0xc) == (1 << 2)) continue;
43584 /* And 01CD, as 01 is in the first lane of the first
43585 operand. */
43586 if ((perm & 3) == 0) continue;
43587 /* And 4567, as then the vperm2[fi]128 doesn't change
43588 anything on the original 4567 second operand. */
43589 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43591 else
43593 /* The second shuffle for e.g. V4DFmode has
43594 4567 and ABCD operands.
43595 Ignore AB67, as 67 is already in the second lane
43596 of the first operand. */
43597 if ((perm & 0xc) == (3 << 2)) continue;
43598 /* And 45CD, as 45 is in the first lane of the first
43599 operand. */
43600 if ((perm & 3) == 2) continue;
43601 /* And 0123, as then the vperm2[fi]128 doesn't change
43602 anything on the original 0123 first operand. */
43603 if ((perm & 0xf) == (1 << 2)) continue;
43606 for (i = 0; i < nelt; i++)
43608 j = d->perm[i] / nelt2;
43609 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43610 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43611 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43612 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43613 else
43614 break;
43617 if (i == nelt)
43619 start_sequence ();
43620 ok = expand_vec_perm_1 (&dsecond);
43621 end_sequence ();
43623 else
43624 ok = false;
43626 if (ok)
43628 if (d->testing_p)
43629 return true;
43631 /* Found a usable second shuffle. dfirst will be
43632 vperm2f128 on d->op0 and d->op1. */
43633 dsecond.testing_p = false;
43634 dfirst = *d;
43635 dfirst.target = gen_reg_rtx (d->vmode);
43636 for (i = 0; i < nelt; i++)
43637 dfirst.perm[i] = (i & (nelt2 - 1))
43638 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43640 ok = expand_vec_perm_1 (&dfirst);
43641 gcc_assert (ok);
43643 /* And dsecond is some single insn shuffle, taking
43644 d->op0 and result of vperm2f128 (if perm < 16) or
43645 d->op1 and result of vperm2f128 (otherwise). */
43646 dsecond.op1 = dfirst.target;
43647 if (perm >= 16)
43648 dsecond.op0 = dfirst.op1;
43650 ok = expand_vec_perm_1 (&dsecond);
43651 gcc_assert (ok);
43653 return true;
43656 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43657 if (d->one_operand_p)
43658 return false;
43661 return false;
43664 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43665 a two vector permutation using 2 intra-lane interleave insns
43666 and cross-lane shuffle for 32-byte vectors. */
43668 static bool
43669 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43671 unsigned i, nelt;
43672 rtx (*gen) (rtx, rtx, rtx);
43674 if (d->one_operand_p)
43675 return false;
43676 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43678 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43680 else
43681 return false;
43683 nelt = d->nelt;
43684 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43685 return false;
43686 for (i = 0; i < nelt; i += 2)
43687 if (d->perm[i] != d->perm[0] + i / 2
43688 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43689 return false;
43691 if (d->testing_p)
43692 return true;
43694 switch (d->vmode)
43696 case V32QImode:
43697 if (d->perm[0])
43698 gen = gen_vec_interleave_highv32qi;
43699 else
43700 gen = gen_vec_interleave_lowv32qi;
43701 break;
43702 case V16HImode:
43703 if (d->perm[0])
43704 gen = gen_vec_interleave_highv16hi;
43705 else
43706 gen = gen_vec_interleave_lowv16hi;
43707 break;
43708 case V8SImode:
43709 if (d->perm[0])
43710 gen = gen_vec_interleave_highv8si;
43711 else
43712 gen = gen_vec_interleave_lowv8si;
43713 break;
43714 case V4DImode:
43715 if (d->perm[0])
43716 gen = gen_vec_interleave_highv4di;
43717 else
43718 gen = gen_vec_interleave_lowv4di;
43719 break;
43720 case V8SFmode:
43721 if (d->perm[0])
43722 gen = gen_vec_interleave_highv8sf;
43723 else
43724 gen = gen_vec_interleave_lowv8sf;
43725 break;
43726 case V4DFmode:
43727 if (d->perm[0])
43728 gen = gen_vec_interleave_highv4df;
43729 else
43730 gen = gen_vec_interleave_lowv4df;
43731 break;
43732 default:
43733 gcc_unreachable ();
43736 emit_insn (gen (d->target, d->op0, d->op1));
43737 return true;
43740 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43741 a single vector permutation using a single intra-lane vector
43742 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43743 the non-swapped and swapped vectors together. */
43745 static bool
43746 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43748 struct expand_vec_perm_d dfirst, dsecond;
43749 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43750 rtx seq;
43751 bool ok;
43752 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43754 if (!TARGET_AVX
43755 || TARGET_AVX2
43756 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43757 || !d->one_operand_p)
43758 return false;
43760 dfirst = *d;
43761 for (i = 0; i < nelt; i++)
43762 dfirst.perm[i] = 0xff;
43763 for (i = 0, msk = 0; i < nelt; i++)
43765 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43766 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43767 return false;
43768 dfirst.perm[j] = d->perm[i];
43769 if (j != i)
43770 msk |= (1 << i);
43772 for (i = 0; i < nelt; i++)
43773 if (dfirst.perm[i] == 0xff)
43774 dfirst.perm[i] = i;
43776 if (!d->testing_p)
43777 dfirst.target = gen_reg_rtx (dfirst.vmode);
43779 start_sequence ();
43780 ok = expand_vec_perm_1 (&dfirst);
43781 seq = get_insns ();
43782 end_sequence ();
43784 if (!ok)
43785 return false;
43787 if (d->testing_p)
43788 return true;
43790 emit_insn (seq);
43792 dsecond = *d;
43793 dsecond.op0 = dfirst.target;
43794 dsecond.op1 = dfirst.target;
43795 dsecond.one_operand_p = true;
43796 dsecond.target = gen_reg_rtx (dsecond.vmode);
43797 for (i = 0; i < nelt; i++)
43798 dsecond.perm[i] = i ^ nelt2;
43800 ok = expand_vec_perm_1 (&dsecond);
43801 gcc_assert (ok);
43803 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43804 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43805 return true;
43808 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43809 permutation using two vperm2f128, followed by a vshufpd insn blending
43810 the two vectors together. */
43812 static bool
43813 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43815 struct expand_vec_perm_d dfirst, dsecond, dthird;
43816 bool ok;
43818 if (!TARGET_AVX || (d->vmode != V4DFmode))
43819 return false;
43821 if (d->testing_p)
43822 return true;
43824 dfirst = *d;
43825 dsecond = *d;
43826 dthird = *d;
43828 dfirst.perm[0] = (d->perm[0] & ~1);
43829 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43830 dfirst.perm[2] = (d->perm[2] & ~1);
43831 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43832 dsecond.perm[0] = (d->perm[1] & ~1);
43833 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43834 dsecond.perm[2] = (d->perm[3] & ~1);
43835 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43836 dthird.perm[0] = (d->perm[0] % 2);
43837 dthird.perm[1] = (d->perm[1] % 2) + 4;
43838 dthird.perm[2] = (d->perm[2] % 2) + 2;
43839 dthird.perm[3] = (d->perm[3] % 2) + 6;
43841 dfirst.target = gen_reg_rtx (dfirst.vmode);
43842 dsecond.target = gen_reg_rtx (dsecond.vmode);
43843 dthird.op0 = dfirst.target;
43844 dthird.op1 = dsecond.target;
43845 dthird.one_operand_p = false;
43847 canonicalize_perm (&dfirst);
43848 canonicalize_perm (&dsecond);
43850 ok = expand_vec_perm_1 (&dfirst)
43851 && expand_vec_perm_1 (&dsecond)
43852 && expand_vec_perm_1 (&dthird);
43854 gcc_assert (ok);
43856 return true;
43859 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43860 permutation with two pshufb insns and an ior. We should have already
43861 failed all two instruction sequences. */
43863 static bool
43864 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43866 rtx rperm[2][16], vperm, l, h, op, m128;
43867 unsigned int i, nelt, eltsz;
43869 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43870 return false;
43871 gcc_assert (!d->one_operand_p);
43873 if (d->testing_p)
43874 return true;
43876 nelt = d->nelt;
43877 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43879 /* Generate two permutation masks. If the required element is within
43880 the given vector it is shuffled into the proper lane. If the required
43881 element is in the other vector, force a zero into the lane by setting
43882 bit 7 in the permutation mask. */
43883 m128 = GEN_INT (-128);
43884 for (i = 0; i < nelt; ++i)
43886 unsigned j, e = d->perm[i];
43887 unsigned which = (e >= nelt);
43888 if (e >= nelt)
43889 e -= nelt;
43891 for (j = 0; j < eltsz; ++j)
43893 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43894 rperm[1-which][i*eltsz + j] = m128;
43898 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43899 vperm = force_reg (V16QImode, vperm);
43901 l = gen_reg_rtx (V16QImode);
43902 op = gen_lowpart (V16QImode, d->op0);
43903 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43905 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43906 vperm = force_reg (V16QImode, vperm);
43908 h = gen_reg_rtx (V16QImode);
43909 op = gen_lowpart (V16QImode, d->op1);
43910 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43912 op = d->target;
43913 if (d->vmode != V16QImode)
43914 op = gen_reg_rtx (V16QImode);
43915 emit_insn (gen_iorv16qi3 (op, l, h));
43916 if (op != d->target)
43917 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43919 return true;
43922 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43923 with two vpshufb insns, vpermq and vpor. We should have already failed
43924 all two or three instruction sequences. */
43926 static bool
43927 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43929 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43930 unsigned int i, nelt, eltsz;
43932 if (!TARGET_AVX2
43933 || !d->one_operand_p
43934 || (d->vmode != V32QImode && d->vmode != V16HImode))
43935 return false;
43937 if (d->testing_p)
43938 return true;
43940 nelt = d->nelt;
43941 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43943 /* Generate two permutation masks. If the required element is within
43944 the same lane, it is shuffled in. If the required element from the
43945 other lane, force a zero by setting bit 7 in the permutation mask.
43946 In the other mask the mask has non-negative elements if element
43947 is requested from the other lane, but also moved to the other lane,
43948 so that the result of vpshufb can have the two V2TImode halves
43949 swapped. */
43950 m128 = GEN_INT (-128);
43951 for (i = 0; i < nelt; ++i)
43953 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43954 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43956 for (j = 0; j < eltsz; ++j)
43958 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43959 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43963 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43964 vperm = force_reg (V32QImode, vperm);
43966 h = gen_reg_rtx (V32QImode);
43967 op = gen_lowpart (V32QImode, d->op0);
43968 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43970 /* Swap the 128-byte lanes of h into hp. */
43971 hp = gen_reg_rtx (V4DImode);
43972 op = gen_lowpart (V4DImode, h);
43973 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43974 const1_rtx));
43976 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43977 vperm = force_reg (V32QImode, vperm);
43979 l = gen_reg_rtx (V32QImode);
43980 op = gen_lowpart (V32QImode, d->op0);
43981 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43983 op = d->target;
43984 if (d->vmode != V32QImode)
43985 op = gen_reg_rtx (V32QImode);
43986 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43987 if (op != d->target)
43988 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43990 return true;
43993 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
43994 and extract-odd permutations of two V32QImode and V16QImode operand
43995 with two vpshufb insns, vpor and vpermq. We should have already
43996 failed all two or three instruction sequences. */
43998 static bool
43999 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44001 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44002 unsigned int i, nelt, eltsz;
44004 if (!TARGET_AVX2
44005 || d->one_operand_p
44006 || (d->vmode != V32QImode && d->vmode != V16HImode))
44007 return false;
44009 for (i = 0; i < d->nelt; ++i)
44010 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44011 return false;
44013 if (d->testing_p)
44014 return true;
44016 nelt = d->nelt;
44017 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44019 /* Generate two permutation masks. In the first permutation mask
44020 the first quarter will contain indexes for the first half
44021 of the op0, the second quarter will contain bit 7 set, third quarter
44022 will contain indexes for the second half of the op0 and the
44023 last quarter bit 7 set. In the second permutation mask
44024 the first quarter will contain bit 7 set, the second quarter
44025 indexes for the first half of the op1, the third quarter bit 7 set
44026 and last quarter indexes for the second half of the op1.
44027 I.e. the first mask e.g. for V32QImode extract even will be:
44028 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44029 (all values masked with 0xf except for -128) and second mask
44030 for extract even will be
44031 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44032 m128 = GEN_INT (-128);
44033 for (i = 0; i < nelt; ++i)
44035 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44036 unsigned which = d->perm[i] >= nelt;
44037 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44039 for (j = 0; j < eltsz; ++j)
44041 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44042 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44046 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44047 vperm = force_reg (V32QImode, vperm);
44049 l = gen_reg_rtx (V32QImode);
44050 op = gen_lowpart (V32QImode, d->op0);
44051 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44053 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44054 vperm = force_reg (V32QImode, vperm);
44056 h = gen_reg_rtx (V32QImode);
44057 op = gen_lowpart (V32QImode, d->op1);
44058 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44060 ior = gen_reg_rtx (V32QImode);
44061 emit_insn (gen_iorv32qi3 (ior, l, h));
44063 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44064 op = gen_reg_rtx (V4DImode);
44065 ior = gen_lowpart (V4DImode, ior);
44066 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44067 const1_rtx, GEN_INT (3)));
44068 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44070 return true;
44073 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44074 and extract-odd permutations. */
44076 static bool
44077 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44079 rtx t1, t2, t3, t4, t5;
44081 switch (d->vmode)
44083 case V4DFmode:
44084 if (d->testing_p)
44085 break;
44086 t1 = gen_reg_rtx (V4DFmode);
44087 t2 = gen_reg_rtx (V4DFmode);
44089 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44090 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44091 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44093 /* Now an unpck[lh]pd will produce the result required. */
44094 if (odd)
44095 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44096 else
44097 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44098 emit_insn (t3);
44099 break;
44101 case V8SFmode:
44103 int mask = odd ? 0xdd : 0x88;
44105 if (d->testing_p)
44106 break;
44107 t1 = gen_reg_rtx (V8SFmode);
44108 t2 = gen_reg_rtx (V8SFmode);
44109 t3 = gen_reg_rtx (V8SFmode);
44111 /* Shuffle within the 128-bit lanes to produce:
44112 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44113 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44114 GEN_INT (mask)));
44116 /* Shuffle the lanes around to produce:
44117 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44118 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44119 GEN_INT (0x3)));
44121 /* Shuffle within the 128-bit lanes to produce:
44122 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44123 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44125 /* Shuffle within the 128-bit lanes to produce:
44126 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44127 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44129 /* Shuffle the lanes around to produce:
44130 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44131 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44132 GEN_INT (0x20)));
44134 break;
44136 case V2DFmode:
44137 case V4SFmode:
44138 case V2DImode:
44139 case V4SImode:
44140 /* These are always directly implementable by expand_vec_perm_1. */
44141 gcc_unreachable ();
44143 case V8HImode:
44144 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44145 return expand_vec_perm_pshufb2 (d);
44146 else
44148 if (d->testing_p)
44149 break;
44150 /* We need 2*log2(N)-1 operations to achieve odd/even
44151 with interleave. */
44152 t1 = gen_reg_rtx (V8HImode);
44153 t2 = gen_reg_rtx (V8HImode);
44154 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44155 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44156 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44157 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44158 if (odd)
44159 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44160 else
44161 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44162 emit_insn (t3);
44164 break;
44166 case V16QImode:
44167 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44168 return expand_vec_perm_pshufb2 (d);
44169 else
44171 if (d->testing_p)
44172 break;
44173 t1 = gen_reg_rtx (V16QImode);
44174 t2 = gen_reg_rtx (V16QImode);
44175 t3 = gen_reg_rtx (V16QImode);
44176 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44177 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44178 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44179 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44180 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44181 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44182 if (odd)
44183 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44184 else
44185 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44186 emit_insn (t3);
44188 break;
44190 case V16HImode:
44191 case V32QImode:
44192 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44194 case V4DImode:
44195 if (!TARGET_AVX2)
44197 struct expand_vec_perm_d d_copy = *d;
44198 d_copy.vmode = V4DFmode;
44199 if (d->testing_p)
44200 d_copy.target = gen_lowpart (V4DFmode, d->target);
44201 else
44202 d_copy.target = gen_reg_rtx (V4DFmode);
44203 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44204 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44205 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44207 if (!d->testing_p)
44208 emit_move_insn (d->target,
44209 gen_lowpart (V4DImode, d_copy.target));
44210 return true;
44212 return false;
44215 if (d->testing_p)
44216 break;
44218 t1 = gen_reg_rtx (V4DImode);
44219 t2 = gen_reg_rtx (V4DImode);
44221 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44222 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44223 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44225 /* Now an vpunpck[lh]qdq will produce the result required. */
44226 if (odd)
44227 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44228 else
44229 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44230 emit_insn (t3);
44231 break;
44233 case V8SImode:
44234 if (!TARGET_AVX2)
44236 struct expand_vec_perm_d d_copy = *d;
44237 d_copy.vmode = V8SFmode;
44238 if (d->testing_p)
44239 d_copy.target = gen_lowpart (V8SFmode, d->target);
44240 else
44241 d_copy.target = gen_reg_rtx (V8SFmode);
44242 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44243 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44244 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44246 if (!d->testing_p)
44247 emit_move_insn (d->target,
44248 gen_lowpart (V8SImode, d_copy.target));
44249 return true;
44251 return false;
44254 if (d->testing_p)
44255 break;
44257 t1 = gen_reg_rtx (V8SImode);
44258 t2 = gen_reg_rtx (V8SImode);
44259 t3 = gen_reg_rtx (V4DImode);
44260 t4 = gen_reg_rtx (V4DImode);
44261 t5 = gen_reg_rtx (V4DImode);
44263 /* Shuffle the lanes around into
44264 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44265 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44266 gen_lowpart (V4DImode, d->op1),
44267 GEN_INT (0x20)));
44268 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44269 gen_lowpart (V4DImode, d->op1),
44270 GEN_INT (0x31)));
44272 /* Swap the 2nd and 3rd position in each lane into
44273 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44274 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44275 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44276 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44277 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44279 /* Now an vpunpck[lh]qdq will produce
44280 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44281 if (odd)
44282 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44283 gen_lowpart (V4DImode, t2));
44284 else
44285 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44286 gen_lowpart (V4DImode, t2));
44287 emit_insn (t3);
44288 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44289 break;
44291 default:
44292 gcc_unreachable ();
44295 return true;
44298 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44299 extract-even and extract-odd permutations. */
44301 static bool
44302 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44304 unsigned i, odd, nelt = d->nelt;
44306 odd = d->perm[0];
44307 if (odd != 0 && odd != 1)
44308 return false;
44310 for (i = 1; i < nelt; ++i)
44311 if (d->perm[i] != 2 * i + odd)
44312 return false;
44314 return expand_vec_perm_even_odd_1 (d, odd);
44317 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44318 permutations. We assume that expand_vec_perm_1 has already failed. */
44320 static bool
44321 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44323 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44324 enum machine_mode vmode = d->vmode;
44325 unsigned char perm2[4];
44326 rtx op0 = d->op0, dest;
44327 bool ok;
44329 switch (vmode)
44331 case V4DFmode:
44332 case V8SFmode:
44333 /* These are special-cased in sse.md so that we can optionally
44334 use the vbroadcast instruction. They expand to two insns
44335 if the input happens to be in a register. */
44336 gcc_unreachable ();
44338 case V2DFmode:
44339 case V2DImode:
44340 case V4SFmode:
44341 case V4SImode:
44342 /* These are always implementable using standard shuffle patterns. */
44343 gcc_unreachable ();
44345 case V8HImode:
44346 case V16QImode:
44347 /* These can be implemented via interleave. We save one insn by
44348 stopping once we have promoted to V4SImode and then use pshufd. */
44349 if (d->testing_p)
44350 return true;
44353 rtx dest;
44354 rtx (*gen) (rtx, rtx, rtx)
44355 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44356 : gen_vec_interleave_lowv8hi;
44358 if (elt >= nelt2)
44360 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44361 : gen_vec_interleave_highv8hi;
44362 elt -= nelt2;
44364 nelt2 /= 2;
44366 dest = gen_reg_rtx (vmode);
44367 emit_insn (gen (dest, op0, op0));
44368 vmode = get_mode_wider_vector (vmode);
44369 op0 = gen_lowpart (vmode, dest);
44371 while (vmode != V4SImode);
44373 memset (perm2, elt, 4);
44374 dest = gen_reg_rtx (V4SImode);
44375 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44376 gcc_assert (ok);
44377 if (!d->testing_p)
44378 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44379 return true;
44381 case V32QImode:
44382 case V16HImode:
44383 case V8SImode:
44384 case V4DImode:
44385 /* For AVX2 broadcasts of the first element vpbroadcast* or
44386 vpermq should be used by expand_vec_perm_1. */
44387 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44388 return false;
44390 default:
44391 gcc_unreachable ();
44395 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44396 broadcast permutations. */
44398 static bool
44399 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44401 unsigned i, elt, nelt = d->nelt;
44403 if (!d->one_operand_p)
44404 return false;
44406 elt = d->perm[0];
44407 for (i = 1; i < nelt; ++i)
44408 if (d->perm[i] != elt)
44409 return false;
44411 return expand_vec_perm_broadcast_1 (d);
44414 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44415 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44416 all the shorter instruction sequences. */
44418 static bool
44419 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44421 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44422 unsigned int i, nelt, eltsz;
44423 bool used[4];
44425 if (!TARGET_AVX2
44426 || d->one_operand_p
44427 || (d->vmode != V32QImode && d->vmode != V16HImode))
44428 return false;
44430 if (d->testing_p)
44431 return true;
44433 nelt = d->nelt;
44434 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44436 /* Generate 4 permutation masks. If the required element is within
44437 the same lane, it is shuffled in. If the required element from the
44438 other lane, force a zero by setting bit 7 in the permutation mask.
44439 In the other mask the mask has non-negative elements if element
44440 is requested from the other lane, but also moved to the other lane,
44441 so that the result of vpshufb can have the two V2TImode halves
44442 swapped. */
44443 m128 = GEN_INT (-128);
44444 for (i = 0; i < 32; ++i)
44446 rperm[0][i] = m128;
44447 rperm[1][i] = m128;
44448 rperm[2][i] = m128;
44449 rperm[3][i] = m128;
44451 used[0] = false;
44452 used[1] = false;
44453 used[2] = false;
44454 used[3] = false;
44455 for (i = 0; i < nelt; ++i)
44457 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44458 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44459 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44461 for (j = 0; j < eltsz; ++j)
44462 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44463 used[which] = true;
44466 for (i = 0; i < 2; ++i)
44468 if (!used[2 * i + 1])
44470 h[i] = NULL_RTX;
44471 continue;
44473 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44474 gen_rtvec_v (32, rperm[2 * i + 1]));
44475 vperm = force_reg (V32QImode, vperm);
44476 h[i] = gen_reg_rtx (V32QImode);
44477 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44478 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44481 /* Swap the 128-byte lanes of h[X]. */
44482 for (i = 0; i < 2; ++i)
44484 if (h[i] == NULL_RTX)
44485 continue;
44486 op = gen_reg_rtx (V4DImode);
44487 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44488 const2_rtx, GEN_INT (3), const0_rtx,
44489 const1_rtx));
44490 h[i] = gen_lowpart (V32QImode, op);
44493 for (i = 0; i < 2; ++i)
44495 if (!used[2 * i])
44497 l[i] = NULL_RTX;
44498 continue;
44500 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44501 vperm = force_reg (V32QImode, vperm);
44502 l[i] = gen_reg_rtx (V32QImode);
44503 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44504 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44507 for (i = 0; i < 2; ++i)
44509 if (h[i] && l[i])
44511 op = gen_reg_rtx (V32QImode);
44512 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44513 l[i] = op;
44515 else if (h[i])
44516 l[i] = h[i];
44519 gcc_assert (l[0] && l[1]);
44520 op = d->target;
44521 if (d->vmode != V32QImode)
44522 op = gen_reg_rtx (V32QImode);
44523 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44524 if (op != d->target)
44525 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44526 return true;
44529 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44530 With all of the interface bits taken care of, perform the expansion
44531 in D and return true on success. */
44533 static bool
44534 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44536 /* Try a single instruction expansion. */
44537 if (expand_vec_perm_1 (d))
44538 return true;
44540 /* Try sequences of two instructions. */
44542 if (expand_vec_perm_pshuflw_pshufhw (d))
44543 return true;
44545 if (expand_vec_perm_palignr (d))
44546 return true;
44548 if (expand_vec_perm_interleave2 (d))
44549 return true;
44551 if (expand_vec_perm_broadcast (d))
44552 return true;
44554 if (expand_vec_perm_vpermq_perm_1 (d))
44555 return true;
44557 if (expand_vec_perm_vperm2f128 (d))
44558 return true;
44560 /* Try sequences of three instructions. */
44562 if (expand_vec_perm_2vperm2f128_vshuf (d))
44563 return true;
44565 if (expand_vec_perm_pshufb2 (d))
44566 return true;
44568 if (expand_vec_perm_interleave3 (d))
44569 return true;
44571 if (expand_vec_perm_vperm2f128_vblend (d))
44572 return true;
44574 /* Try sequences of four instructions. */
44576 if (expand_vec_perm_vpshufb2_vpermq (d))
44577 return true;
44579 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44580 return true;
44582 /* ??? Look for narrow permutations whose element orderings would
44583 allow the promotion to a wider mode. */
44585 /* ??? Look for sequences of interleave or a wider permute that place
44586 the data into the correct lanes for a half-vector shuffle like
44587 pshuf[lh]w or vpermilps. */
44589 /* ??? Look for sequences of interleave that produce the desired results.
44590 The combinatorics of punpck[lh] get pretty ugly... */
44592 if (expand_vec_perm_even_odd (d))
44593 return true;
44595 /* Even longer sequences. */
44596 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44597 return true;
44599 return false;
44602 /* If a permutation only uses one operand, make it clear. Returns true
44603 if the permutation references both operands. */
44605 static bool
44606 canonicalize_perm (struct expand_vec_perm_d *d)
44608 int i, which, nelt = d->nelt;
44610 for (i = which = 0; i < nelt; ++i)
44611 which |= (d->perm[i] < nelt ? 1 : 2);
44613 d->one_operand_p = true;
44614 switch (which)
44616 default:
44617 gcc_unreachable();
44619 case 3:
44620 if (!rtx_equal_p (d->op0, d->op1))
44622 d->one_operand_p = false;
44623 break;
44625 /* The elements of PERM do not suggest that only the first operand
44626 is used, but both operands are identical. Allow easier matching
44627 of the permutation by folding the permutation into the single
44628 input vector. */
44629 /* FALLTHRU */
44631 case 2:
44632 for (i = 0; i < nelt; ++i)
44633 d->perm[i] &= nelt - 1;
44634 d->op0 = d->op1;
44635 break;
44637 case 1:
44638 d->op1 = d->op0;
44639 break;
44642 return (which == 3);
44645 bool
44646 ix86_expand_vec_perm_const (rtx operands[4])
44648 struct expand_vec_perm_d d;
44649 unsigned char perm[MAX_VECT_LEN];
44650 int i, nelt;
44651 bool two_args;
44652 rtx sel;
44654 d.target = operands[0];
44655 d.op0 = operands[1];
44656 d.op1 = operands[2];
44657 sel = operands[3];
44659 d.vmode = GET_MODE (d.target);
44660 gcc_assert (VECTOR_MODE_P (d.vmode));
44661 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44662 d.testing_p = false;
44664 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44665 gcc_assert (XVECLEN (sel, 0) == nelt);
44666 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44668 for (i = 0; i < nelt; ++i)
44670 rtx e = XVECEXP (sel, 0, i);
44671 int ei = INTVAL (e) & (2 * nelt - 1);
44672 d.perm[i] = ei;
44673 perm[i] = ei;
44676 two_args = canonicalize_perm (&d);
44678 if (ix86_expand_vec_perm_const_1 (&d))
44679 return true;
44681 /* If the selector says both arguments are needed, but the operands are the
44682 same, the above tried to expand with one_operand_p and flattened selector.
44683 If that didn't work, retry without one_operand_p; we succeeded with that
44684 during testing. */
44685 if (two_args && d.one_operand_p)
44687 d.one_operand_p = false;
44688 memcpy (d.perm, perm, sizeof (perm));
44689 return ix86_expand_vec_perm_const_1 (&d);
44692 return false;
44695 /* Implement targetm.vectorize.vec_perm_const_ok. */
44697 static bool
44698 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44699 const unsigned char *sel)
44701 struct expand_vec_perm_d d;
44702 unsigned int i, nelt, which;
44703 bool ret;
44705 d.vmode = vmode;
44706 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44707 d.testing_p = true;
44709 /* Given sufficient ISA support we can just return true here
44710 for selected vector modes. */
44711 if (d.vmode == V16SImode || d.vmode == V16SFmode
44712 || d.vmode == V8DFmode || d.vmode == V8DImode)
44713 /* All implementable with a single vpermi2 insn. */
44714 return true;
44715 if (GET_MODE_SIZE (d.vmode) == 16)
44717 /* All implementable with a single vpperm insn. */
44718 if (TARGET_XOP)
44719 return true;
44720 /* All implementable with 2 pshufb + 1 ior. */
44721 if (TARGET_SSSE3)
44722 return true;
44723 /* All implementable with shufpd or unpck[lh]pd. */
44724 if (d.nelt == 2)
44725 return true;
44728 /* Extract the values from the vector CST into the permutation
44729 array in D. */
44730 memcpy (d.perm, sel, nelt);
44731 for (i = which = 0; i < nelt; ++i)
44733 unsigned char e = d.perm[i];
44734 gcc_assert (e < 2 * nelt);
44735 which |= (e < nelt ? 1 : 2);
44738 /* For all elements from second vector, fold the elements to first. */
44739 if (which == 2)
44740 for (i = 0; i < nelt; ++i)
44741 d.perm[i] -= nelt;
44743 /* Check whether the mask can be applied to the vector type. */
44744 d.one_operand_p = (which != 3);
44746 /* Implementable with shufps or pshufd. */
44747 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44748 return true;
44750 /* Otherwise we have to go through the motions and see if we can
44751 figure out how to generate the requested permutation. */
44752 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44753 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44754 if (!d.one_operand_p)
44755 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44757 start_sequence ();
44758 ret = ix86_expand_vec_perm_const_1 (&d);
44759 end_sequence ();
44761 return ret;
44764 void
44765 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44767 struct expand_vec_perm_d d;
44768 unsigned i, nelt;
44770 d.target = targ;
44771 d.op0 = op0;
44772 d.op1 = op1;
44773 d.vmode = GET_MODE (targ);
44774 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44775 d.one_operand_p = false;
44776 d.testing_p = false;
44778 for (i = 0; i < nelt; ++i)
44779 d.perm[i] = i * 2 + odd;
44781 /* We'll either be able to implement the permutation directly... */
44782 if (expand_vec_perm_1 (&d))
44783 return;
44785 /* ... or we use the special-case patterns. */
44786 expand_vec_perm_even_odd_1 (&d, odd);
44789 static void
44790 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44792 struct expand_vec_perm_d d;
44793 unsigned i, nelt, base;
44794 bool ok;
44796 d.target = targ;
44797 d.op0 = op0;
44798 d.op1 = op1;
44799 d.vmode = GET_MODE (targ);
44800 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44801 d.one_operand_p = false;
44802 d.testing_p = false;
44804 base = high_p ? nelt / 2 : 0;
44805 for (i = 0; i < nelt / 2; ++i)
44807 d.perm[i * 2] = i + base;
44808 d.perm[i * 2 + 1] = i + base + nelt;
44811 /* Note that for AVX this isn't one instruction. */
44812 ok = ix86_expand_vec_perm_const_1 (&d);
44813 gcc_assert (ok);
44817 /* Expand a vector operation CODE for a V*QImode in terms of the
44818 same operation on V*HImode. */
44820 void
44821 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44823 enum machine_mode qimode = GET_MODE (dest);
44824 enum machine_mode himode;
44825 rtx (*gen_il) (rtx, rtx, rtx);
44826 rtx (*gen_ih) (rtx, rtx, rtx);
44827 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44828 struct expand_vec_perm_d d;
44829 bool ok, full_interleave;
44830 bool uns_p = false;
44831 int i;
44833 switch (qimode)
44835 case V16QImode:
44836 himode = V8HImode;
44837 gen_il = gen_vec_interleave_lowv16qi;
44838 gen_ih = gen_vec_interleave_highv16qi;
44839 break;
44840 case V32QImode:
44841 himode = V16HImode;
44842 gen_il = gen_avx2_interleave_lowv32qi;
44843 gen_ih = gen_avx2_interleave_highv32qi;
44844 break;
44845 default:
44846 gcc_unreachable ();
44849 op2_l = op2_h = op2;
44850 switch (code)
44852 case MULT:
44853 /* Unpack data such that we've got a source byte in each low byte of
44854 each word. We don't care what goes into the high byte of each word.
44855 Rather than trying to get zero in there, most convenient is to let
44856 it be a copy of the low byte. */
44857 op2_l = gen_reg_rtx (qimode);
44858 op2_h = gen_reg_rtx (qimode);
44859 emit_insn (gen_il (op2_l, op2, op2));
44860 emit_insn (gen_ih (op2_h, op2, op2));
44861 /* FALLTHRU */
44863 op1_l = gen_reg_rtx (qimode);
44864 op1_h = gen_reg_rtx (qimode);
44865 emit_insn (gen_il (op1_l, op1, op1));
44866 emit_insn (gen_ih (op1_h, op1, op1));
44867 full_interleave = qimode == V16QImode;
44868 break;
44870 case ASHIFT:
44871 case LSHIFTRT:
44872 uns_p = true;
44873 /* FALLTHRU */
44874 case ASHIFTRT:
44875 op1_l = gen_reg_rtx (himode);
44876 op1_h = gen_reg_rtx (himode);
44877 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44878 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44879 full_interleave = true;
44880 break;
44881 default:
44882 gcc_unreachable ();
44885 /* Perform the operation. */
44886 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44887 1, OPTAB_DIRECT);
44888 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44889 1, OPTAB_DIRECT);
44890 gcc_assert (res_l && res_h);
44892 /* Merge the data back into the right place. */
44893 d.target = dest;
44894 d.op0 = gen_lowpart (qimode, res_l);
44895 d.op1 = gen_lowpart (qimode, res_h);
44896 d.vmode = qimode;
44897 d.nelt = GET_MODE_NUNITS (qimode);
44898 d.one_operand_p = false;
44899 d.testing_p = false;
44901 if (full_interleave)
44903 /* For SSE2, we used an full interleave, so the desired
44904 results are in the even elements. */
44905 for (i = 0; i < 32; ++i)
44906 d.perm[i] = i * 2;
44908 else
44910 /* For AVX, the interleave used above was not cross-lane. So the
44911 extraction is evens but with the second and third quarter swapped.
44912 Happily, that is even one insn shorter than even extraction. */
44913 for (i = 0; i < 32; ++i)
44914 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44917 ok = ix86_expand_vec_perm_const_1 (&d);
44918 gcc_assert (ok);
44920 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44921 gen_rtx_fmt_ee (code, qimode, op1, op2));
44924 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44925 if op is CONST_VECTOR with all odd elements equal to their
44926 preceding element. */
44928 static bool
44929 const_vector_equal_evenodd_p (rtx op)
44931 enum machine_mode mode = GET_MODE (op);
44932 int i, nunits = GET_MODE_NUNITS (mode);
44933 if (GET_CODE (op) != CONST_VECTOR
44934 || nunits != CONST_VECTOR_NUNITS (op))
44935 return false;
44936 for (i = 0; i < nunits; i += 2)
44937 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44938 return false;
44939 return true;
44942 void
44943 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44944 bool uns_p, bool odd_p)
44946 enum machine_mode mode = GET_MODE (op1);
44947 enum machine_mode wmode = GET_MODE (dest);
44948 rtx x;
44949 rtx orig_op1 = op1, orig_op2 = op2;
44951 if (!nonimmediate_operand (op1, mode))
44952 op1 = force_reg (mode, op1);
44953 if (!nonimmediate_operand (op2, mode))
44954 op2 = force_reg (mode, op2);
44956 /* We only play even/odd games with vectors of SImode. */
44957 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44959 /* If we're looking for the odd results, shift those members down to
44960 the even slots. For some cpus this is faster than a PSHUFD. */
44961 if (odd_p)
44963 /* For XOP use vpmacsdqh, but only for smult, as it is only
44964 signed. */
44965 if (TARGET_XOP && mode == V4SImode && !uns_p)
44967 x = force_reg (wmode, CONST0_RTX (wmode));
44968 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44969 return;
44972 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44973 if (!const_vector_equal_evenodd_p (orig_op1))
44974 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44975 x, NULL, 1, OPTAB_DIRECT);
44976 if (!const_vector_equal_evenodd_p (orig_op2))
44977 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44978 x, NULL, 1, OPTAB_DIRECT);
44979 op1 = gen_lowpart (mode, op1);
44980 op2 = gen_lowpart (mode, op2);
44983 if (mode == V16SImode)
44985 if (uns_p)
44986 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44987 else
44988 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44990 else if (mode == V8SImode)
44992 if (uns_p)
44993 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
44994 else
44995 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
44997 else if (uns_p)
44998 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
44999 else if (TARGET_SSE4_1)
45000 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45001 else
45003 rtx s1, s2, t0, t1, t2;
45005 /* The easiest way to implement this without PMULDQ is to go through
45006 the motions as if we are performing a full 64-bit multiply. With
45007 the exception that we need to do less shuffling of the elements. */
45009 /* Compute the sign-extension, aka highparts, of the two operands. */
45010 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45011 op1, pc_rtx, pc_rtx);
45012 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45013 op2, pc_rtx, pc_rtx);
45015 /* Multiply LO(A) * HI(B), and vice-versa. */
45016 t1 = gen_reg_rtx (wmode);
45017 t2 = gen_reg_rtx (wmode);
45018 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45019 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45021 /* Multiply LO(A) * LO(B). */
45022 t0 = gen_reg_rtx (wmode);
45023 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45025 /* Combine and shift the highparts into place. */
45026 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45027 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45028 1, OPTAB_DIRECT);
45030 /* Combine high and low parts. */
45031 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45032 return;
45034 emit_insn (x);
45037 void
45038 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45039 bool uns_p, bool high_p)
45041 enum machine_mode wmode = GET_MODE (dest);
45042 enum machine_mode mode = GET_MODE (op1);
45043 rtx t1, t2, t3, t4, mask;
45045 switch (mode)
45047 case V4SImode:
45048 t1 = gen_reg_rtx (mode);
45049 t2 = gen_reg_rtx (mode);
45050 if (TARGET_XOP && !uns_p)
45052 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45053 shuffle the elements once so that all elements are in the right
45054 place for immediate use: { A C B D }. */
45055 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45056 const1_rtx, GEN_INT (3)));
45057 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45058 const1_rtx, GEN_INT (3)));
45060 else
45062 /* Put the elements into place for the multiply. */
45063 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45064 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45065 high_p = false;
45067 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45068 break;
45070 case V8SImode:
45071 /* Shuffle the elements between the lanes. After this we
45072 have { A B E F | C D G H } for each operand. */
45073 t1 = gen_reg_rtx (V4DImode);
45074 t2 = gen_reg_rtx (V4DImode);
45075 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45076 const0_rtx, const2_rtx,
45077 const1_rtx, GEN_INT (3)));
45078 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45079 const0_rtx, const2_rtx,
45080 const1_rtx, GEN_INT (3)));
45082 /* Shuffle the elements within the lanes. After this we
45083 have { A A B B | C C D D } or { E E F F | G G H H }. */
45084 t3 = gen_reg_rtx (V8SImode);
45085 t4 = gen_reg_rtx (V8SImode);
45086 mask = GEN_INT (high_p
45087 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45088 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45089 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45090 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45092 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45093 break;
45095 case V8HImode:
45096 case V16HImode:
45097 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45098 uns_p, OPTAB_DIRECT);
45099 t2 = expand_binop (mode,
45100 uns_p ? umul_highpart_optab : smul_highpart_optab,
45101 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45102 gcc_assert (t1 && t2);
45104 t3 = gen_reg_rtx (mode);
45105 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45106 emit_move_insn (dest, gen_lowpart (wmode, t3));
45107 break;
45109 case V16QImode:
45110 case V32QImode:
45111 t1 = gen_reg_rtx (wmode);
45112 t2 = gen_reg_rtx (wmode);
45113 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45114 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45116 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45117 break;
45119 default:
45120 gcc_unreachable ();
45124 void
45125 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45127 rtx res_1, res_2, res_3, res_4;
45129 res_1 = gen_reg_rtx (V4SImode);
45130 res_2 = gen_reg_rtx (V4SImode);
45131 res_3 = gen_reg_rtx (V2DImode);
45132 res_4 = gen_reg_rtx (V2DImode);
45133 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45134 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45136 /* Move the results in element 2 down to element 1; we don't care
45137 what goes in elements 2 and 3. Then we can merge the parts
45138 back together with an interleave.
45140 Note that two other sequences were tried:
45141 (1) Use interleaves at the start instead of psrldq, which allows
45142 us to use a single shufps to merge things back at the end.
45143 (2) Use shufps here to combine the two vectors, then pshufd to
45144 put the elements in the correct order.
45145 In both cases the cost of the reformatting stall was too high
45146 and the overall sequence slower. */
45148 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45149 const0_rtx, const2_rtx,
45150 const0_rtx, const0_rtx));
45151 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45152 const0_rtx, const2_rtx,
45153 const0_rtx, const0_rtx));
45154 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45156 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45159 void
45160 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45162 enum machine_mode mode = GET_MODE (op0);
45163 rtx t1, t2, t3, t4, t5, t6;
45165 if (TARGET_XOP && mode == V2DImode)
45167 /* op1: A,B,C,D, op2: E,F,G,H */
45168 op1 = gen_lowpart (V4SImode, op1);
45169 op2 = gen_lowpart (V4SImode, op2);
45171 t1 = gen_reg_rtx (V4SImode);
45172 t2 = gen_reg_rtx (V4SImode);
45173 t3 = gen_reg_rtx (V2DImode);
45174 t4 = gen_reg_rtx (V2DImode);
45176 /* t1: B,A,D,C */
45177 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45178 GEN_INT (1),
45179 GEN_INT (0),
45180 GEN_INT (3),
45181 GEN_INT (2)));
45183 /* t2: (B*E),(A*F),(D*G),(C*H) */
45184 emit_insn (gen_mulv4si3 (t2, t1, op2));
45186 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45187 emit_insn (gen_xop_phadddq (t3, t2));
45189 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45190 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45192 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45193 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45195 else
45197 enum machine_mode nmode;
45198 rtx (*umul) (rtx, rtx, rtx);
45200 if (mode == V2DImode)
45202 umul = gen_vec_widen_umult_even_v4si;
45203 nmode = V4SImode;
45205 else if (mode == V4DImode)
45207 umul = gen_vec_widen_umult_even_v8si;
45208 nmode = V8SImode;
45210 else if (mode == V8DImode)
45212 umul = gen_vec_widen_umult_even_v16si;
45213 nmode = V16SImode;
45215 else
45216 gcc_unreachable ();
45219 /* Multiply low parts. */
45220 t1 = gen_reg_rtx (mode);
45221 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45223 /* Shift input vectors right 32 bits so we can multiply high parts. */
45224 t6 = GEN_INT (32);
45225 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45226 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45228 /* Multiply high parts by low parts. */
45229 t4 = gen_reg_rtx (mode);
45230 t5 = gen_reg_rtx (mode);
45231 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45232 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45234 /* Combine and shift the highparts back. */
45235 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45236 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45238 /* Combine high and low parts. */
45239 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45242 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45243 gen_rtx_MULT (mode, op1, op2));
45246 /* Calculate integer abs() using only SSE2 instructions. */
45248 void
45249 ix86_expand_sse2_abs (rtx target, rtx input)
45251 enum machine_mode mode = GET_MODE (target);
45252 rtx tmp0, tmp1, x;
45254 switch (mode)
45256 /* For 32-bit signed integer X, the best way to calculate the absolute
45257 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45258 case V4SImode:
45259 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45260 GEN_INT (GET_MODE_BITSIZE
45261 (GET_MODE_INNER (mode)) - 1),
45262 NULL, 0, OPTAB_DIRECT);
45263 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45264 NULL, 0, OPTAB_DIRECT);
45265 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45266 target, 0, OPTAB_DIRECT);
45267 break;
45269 /* For 16-bit signed integer X, the best way to calculate the absolute
45270 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45271 case V8HImode:
45272 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45274 x = expand_simple_binop (mode, SMAX, tmp0, input,
45275 target, 0, OPTAB_DIRECT);
45276 break;
45278 /* For 8-bit signed integer X, the best way to calculate the absolute
45279 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45280 as SSE2 provides the PMINUB insn. */
45281 case V16QImode:
45282 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45284 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45285 target, 0, OPTAB_DIRECT);
45286 break;
45288 default:
45289 gcc_unreachable ();
45292 if (x != target)
45293 emit_move_insn (target, x);
45296 /* Expand an insert into a vector register through pinsr insn.
45297 Return true if successful. */
45299 bool
45300 ix86_expand_pinsr (rtx *operands)
45302 rtx dst = operands[0];
45303 rtx src = operands[3];
45305 unsigned int size = INTVAL (operands[1]);
45306 unsigned int pos = INTVAL (operands[2]);
45308 if (GET_CODE (dst) == SUBREG)
45310 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45311 dst = SUBREG_REG (dst);
45314 if (GET_CODE (src) == SUBREG)
45315 src = SUBREG_REG (src);
45317 switch (GET_MODE (dst))
45319 case V16QImode:
45320 case V8HImode:
45321 case V4SImode:
45322 case V2DImode:
45324 enum machine_mode srcmode, dstmode;
45325 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45327 srcmode = mode_for_size (size, MODE_INT, 0);
45329 switch (srcmode)
45331 case QImode:
45332 if (!TARGET_SSE4_1)
45333 return false;
45334 dstmode = V16QImode;
45335 pinsr = gen_sse4_1_pinsrb;
45336 break;
45338 case HImode:
45339 if (!TARGET_SSE2)
45340 return false;
45341 dstmode = V8HImode;
45342 pinsr = gen_sse2_pinsrw;
45343 break;
45345 case SImode:
45346 if (!TARGET_SSE4_1)
45347 return false;
45348 dstmode = V4SImode;
45349 pinsr = gen_sse4_1_pinsrd;
45350 break;
45352 case DImode:
45353 gcc_assert (TARGET_64BIT);
45354 if (!TARGET_SSE4_1)
45355 return false;
45356 dstmode = V2DImode;
45357 pinsr = gen_sse4_1_pinsrq;
45358 break;
45360 default:
45361 return false;
45364 rtx d = dst;
45365 if (GET_MODE (dst) != dstmode)
45366 d = gen_reg_rtx (dstmode);
45367 src = gen_lowpart (srcmode, src);
45369 pos /= size;
45371 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45372 GEN_INT (1 << pos)));
45373 if (d != dst)
45374 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45375 return true;
45378 default:
45379 return false;
45383 /* This function returns the calling abi specific va_list type node.
45384 It returns the FNDECL specific va_list type. */
45386 static tree
45387 ix86_fn_abi_va_list (tree fndecl)
45389 if (!TARGET_64BIT)
45390 return va_list_type_node;
45391 gcc_assert (fndecl != NULL_TREE);
45393 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45394 return ms_va_list_type_node;
45395 else
45396 return sysv_va_list_type_node;
45399 /* Returns the canonical va_list type specified by TYPE. If there
45400 is no valid TYPE provided, it return NULL_TREE. */
45402 static tree
45403 ix86_canonical_va_list_type (tree type)
45405 tree wtype, htype;
45407 /* Resolve references and pointers to va_list type. */
45408 if (TREE_CODE (type) == MEM_REF)
45409 type = TREE_TYPE (type);
45410 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45411 type = TREE_TYPE (type);
45412 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45413 type = TREE_TYPE (type);
45415 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45417 wtype = va_list_type_node;
45418 gcc_assert (wtype != NULL_TREE);
45419 htype = type;
45420 if (TREE_CODE (wtype) == ARRAY_TYPE)
45422 /* If va_list is an array type, the argument may have decayed
45423 to a pointer type, e.g. by being passed to another function.
45424 In that case, unwrap both types so that we can compare the
45425 underlying records. */
45426 if (TREE_CODE (htype) == ARRAY_TYPE
45427 || POINTER_TYPE_P (htype))
45429 wtype = TREE_TYPE (wtype);
45430 htype = TREE_TYPE (htype);
45433 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45434 return va_list_type_node;
45435 wtype = sysv_va_list_type_node;
45436 gcc_assert (wtype != NULL_TREE);
45437 htype = type;
45438 if (TREE_CODE (wtype) == ARRAY_TYPE)
45440 /* If va_list is an array type, the argument may have decayed
45441 to a pointer type, e.g. by being passed to another function.
45442 In that case, unwrap both types so that we can compare the
45443 underlying records. */
45444 if (TREE_CODE (htype) == ARRAY_TYPE
45445 || POINTER_TYPE_P (htype))
45447 wtype = TREE_TYPE (wtype);
45448 htype = TREE_TYPE (htype);
45451 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45452 return sysv_va_list_type_node;
45453 wtype = ms_va_list_type_node;
45454 gcc_assert (wtype != NULL_TREE);
45455 htype = type;
45456 if (TREE_CODE (wtype) == ARRAY_TYPE)
45458 /* If va_list is an array type, the argument may have decayed
45459 to a pointer type, e.g. by being passed to another function.
45460 In that case, unwrap both types so that we can compare the
45461 underlying records. */
45462 if (TREE_CODE (htype) == ARRAY_TYPE
45463 || POINTER_TYPE_P (htype))
45465 wtype = TREE_TYPE (wtype);
45466 htype = TREE_TYPE (htype);
45469 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45470 return ms_va_list_type_node;
45471 return NULL_TREE;
45473 return std_canonical_va_list_type (type);
45476 /* Iterate through the target-specific builtin types for va_list.
45477 IDX denotes the iterator, *PTREE is set to the result type of
45478 the va_list builtin, and *PNAME to its internal type.
45479 Returns zero if there is no element for this index, otherwise
45480 IDX should be increased upon the next call.
45481 Note, do not iterate a base builtin's name like __builtin_va_list.
45482 Used from c_common_nodes_and_builtins. */
45484 static int
45485 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45487 if (TARGET_64BIT)
45489 switch (idx)
45491 default:
45492 break;
45494 case 0:
45495 *ptree = ms_va_list_type_node;
45496 *pname = "__builtin_ms_va_list";
45497 return 1;
45499 case 1:
45500 *ptree = sysv_va_list_type_node;
45501 *pname = "__builtin_sysv_va_list";
45502 return 1;
45506 return 0;
45509 #undef TARGET_SCHED_DISPATCH
45510 #define TARGET_SCHED_DISPATCH has_dispatch
45511 #undef TARGET_SCHED_DISPATCH_DO
45512 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45513 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45514 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45515 #undef TARGET_SCHED_REORDER
45516 #define TARGET_SCHED_REORDER ix86_sched_reorder
45517 #undef TARGET_SCHED_ADJUST_PRIORITY
45518 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45519 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45520 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45521 ix86_dependencies_evaluation_hook
45523 /* The size of the dispatch window is the total number of bytes of
45524 object code allowed in a window. */
45525 #define DISPATCH_WINDOW_SIZE 16
45527 /* Number of dispatch windows considered for scheduling. */
45528 #define MAX_DISPATCH_WINDOWS 3
45530 /* Maximum number of instructions in a window. */
45531 #define MAX_INSN 4
45533 /* Maximum number of immediate operands in a window. */
45534 #define MAX_IMM 4
45536 /* Maximum number of immediate bits allowed in a window. */
45537 #define MAX_IMM_SIZE 128
45539 /* Maximum number of 32 bit immediates allowed in a window. */
45540 #define MAX_IMM_32 4
45542 /* Maximum number of 64 bit immediates allowed in a window. */
45543 #define MAX_IMM_64 2
45545 /* Maximum total of loads or prefetches allowed in a window. */
45546 #define MAX_LOAD 2
45548 /* Maximum total of stores allowed in a window. */
45549 #define MAX_STORE 1
45551 #undef BIG
45552 #define BIG 100
45555 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45556 enum dispatch_group {
45557 disp_no_group = 0,
45558 disp_load,
45559 disp_store,
45560 disp_load_store,
45561 disp_prefetch,
45562 disp_imm,
45563 disp_imm_32,
45564 disp_imm_64,
45565 disp_branch,
45566 disp_cmp,
45567 disp_jcc,
45568 disp_last
45571 /* Number of allowable groups in a dispatch window. It is an array
45572 indexed by dispatch_group enum. 100 is used as a big number,
45573 because the number of these kind of operations does not have any
45574 effect in dispatch window, but we need them for other reasons in
45575 the table. */
45576 static unsigned int num_allowable_groups[disp_last] = {
45577 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45580 char group_name[disp_last + 1][16] = {
45581 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45582 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45583 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45586 /* Instruction path. */
45587 enum insn_path {
45588 no_path = 0,
45589 path_single, /* Single micro op. */
45590 path_double, /* Double micro op. */
45591 path_multi, /* Instructions with more than 2 micro op.. */
45592 last_path
45595 /* sched_insn_info defines a window to the instructions scheduled in
45596 the basic block. It contains a pointer to the insn_info table and
45597 the instruction scheduled.
45599 Windows are allocated for each basic block and are linked
45600 together. */
45601 typedef struct sched_insn_info_s {
45602 rtx insn;
45603 enum dispatch_group group;
45604 enum insn_path path;
45605 int byte_len;
45606 int imm_bytes;
45607 } sched_insn_info;
45609 /* Linked list of dispatch windows. This is a two way list of
45610 dispatch windows of a basic block. It contains information about
45611 the number of uops in the window and the total number of
45612 instructions and of bytes in the object code for this dispatch
45613 window. */
45614 typedef struct dispatch_windows_s {
45615 int num_insn; /* Number of insn in the window. */
45616 int num_uops; /* Number of uops in the window. */
45617 int window_size; /* Number of bytes in the window. */
45618 int window_num; /* Window number between 0 or 1. */
45619 int num_imm; /* Number of immediates in an insn. */
45620 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45621 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45622 int imm_size; /* Total immediates in the window. */
45623 int num_loads; /* Total memory loads in the window. */
45624 int num_stores; /* Total memory stores in the window. */
45625 int violation; /* Violation exists in window. */
45626 sched_insn_info *window; /* Pointer to the window. */
45627 struct dispatch_windows_s *next;
45628 struct dispatch_windows_s *prev;
45629 } dispatch_windows;
45631 /* Immediate valuse used in an insn. */
45632 typedef struct imm_info_s
45634 int imm;
45635 int imm32;
45636 int imm64;
45637 } imm_info;
45639 static dispatch_windows *dispatch_window_list;
45640 static dispatch_windows *dispatch_window_list1;
45642 /* Get dispatch group of insn. */
45644 static enum dispatch_group
45645 get_mem_group (rtx insn)
45647 enum attr_memory memory;
45649 if (INSN_CODE (insn) < 0)
45650 return disp_no_group;
45651 memory = get_attr_memory (insn);
45652 if (memory == MEMORY_STORE)
45653 return disp_store;
45655 if (memory == MEMORY_LOAD)
45656 return disp_load;
45658 if (memory == MEMORY_BOTH)
45659 return disp_load_store;
45661 return disp_no_group;
45664 /* Return true if insn is a compare instruction. */
45666 static bool
45667 is_cmp (rtx insn)
45669 enum attr_type type;
45671 type = get_attr_type (insn);
45672 return (type == TYPE_TEST
45673 || type == TYPE_ICMP
45674 || type == TYPE_FCMP
45675 || GET_CODE (PATTERN (insn)) == COMPARE);
45678 /* Return true if a dispatch violation encountered. */
45680 static bool
45681 dispatch_violation (void)
45683 if (dispatch_window_list->next)
45684 return dispatch_window_list->next->violation;
45685 return dispatch_window_list->violation;
45688 /* Return true if insn is a branch instruction. */
45690 static bool
45691 is_branch (rtx insn)
45693 return (CALL_P (insn) || JUMP_P (insn));
45696 /* Return true if insn is a prefetch instruction. */
45698 static bool
45699 is_prefetch (rtx insn)
45701 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45704 /* This function initializes a dispatch window and the list container holding a
45705 pointer to the window. */
45707 static void
45708 init_window (int window_num)
45710 int i;
45711 dispatch_windows *new_list;
45713 if (window_num == 0)
45714 new_list = dispatch_window_list;
45715 else
45716 new_list = dispatch_window_list1;
45718 new_list->num_insn = 0;
45719 new_list->num_uops = 0;
45720 new_list->window_size = 0;
45721 new_list->next = NULL;
45722 new_list->prev = NULL;
45723 new_list->window_num = window_num;
45724 new_list->num_imm = 0;
45725 new_list->num_imm_32 = 0;
45726 new_list->num_imm_64 = 0;
45727 new_list->imm_size = 0;
45728 new_list->num_loads = 0;
45729 new_list->num_stores = 0;
45730 new_list->violation = false;
45732 for (i = 0; i < MAX_INSN; i++)
45734 new_list->window[i].insn = NULL;
45735 new_list->window[i].group = disp_no_group;
45736 new_list->window[i].path = no_path;
45737 new_list->window[i].byte_len = 0;
45738 new_list->window[i].imm_bytes = 0;
45740 return;
45743 /* This function allocates and initializes a dispatch window and the
45744 list container holding a pointer to the window. */
45746 static dispatch_windows *
45747 allocate_window (void)
45749 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45750 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45752 return new_list;
45755 /* This routine initializes the dispatch scheduling information. It
45756 initiates building dispatch scheduler tables and constructs the
45757 first dispatch window. */
45759 static void
45760 init_dispatch_sched (void)
45762 /* Allocate a dispatch list and a window. */
45763 dispatch_window_list = allocate_window ();
45764 dispatch_window_list1 = allocate_window ();
45765 init_window (0);
45766 init_window (1);
45769 /* This function returns true if a branch is detected. End of a basic block
45770 does not have to be a branch, but here we assume only branches end a
45771 window. */
45773 static bool
45774 is_end_basic_block (enum dispatch_group group)
45776 return group == disp_branch;
45779 /* This function is called when the end of a window processing is reached. */
45781 static void
45782 process_end_window (void)
45784 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45785 if (dispatch_window_list->next)
45787 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45788 gcc_assert (dispatch_window_list->window_size
45789 + dispatch_window_list1->window_size <= 48);
45790 init_window (1);
45792 init_window (0);
45795 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45796 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45797 for 48 bytes of instructions. Note that these windows are not dispatch
45798 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45800 static dispatch_windows *
45801 allocate_next_window (int window_num)
45803 if (window_num == 0)
45805 if (dispatch_window_list->next)
45806 init_window (1);
45807 init_window (0);
45808 return dispatch_window_list;
45811 dispatch_window_list->next = dispatch_window_list1;
45812 dispatch_window_list1->prev = dispatch_window_list;
45814 return dispatch_window_list1;
45817 /* Increment the number of immediate operands of an instruction. */
45819 static int
45820 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45822 if (*in_rtx == 0)
45823 return 0;
45825 switch ( GET_CODE (*in_rtx))
45827 case CONST:
45828 case SYMBOL_REF:
45829 case CONST_INT:
45830 (imm_values->imm)++;
45831 if (x86_64_immediate_operand (*in_rtx, SImode))
45832 (imm_values->imm32)++;
45833 else
45834 (imm_values->imm64)++;
45835 break;
45837 case CONST_DOUBLE:
45838 (imm_values->imm)++;
45839 (imm_values->imm64)++;
45840 break;
45842 case CODE_LABEL:
45843 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45845 (imm_values->imm)++;
45846 (imm_values->imm32)++;
45848 break;
45850 default:
45851 break;
45854 return 0;
45857 /* Compute number of immediate operands of an instruction. */
45859 static void
45860 find_constant (rtx in_rtx, imm_info *imm_values)
45862 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45863 (rtx_function) find_constant_1, (void *) imm_values);
45866 /* Return total size of immediate operands of an instruction along with number
45867 of corresponding immediate-operands. It initializes its parameters to zero
45868 befor calling FIND_CONSTANT.
45869 INSN is the input instruction. IMM is the total of immediates.
45870 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45871 bit immediates. */
45873 static int
45874 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45876 imm_info imm_values = {0, 0, 0};
45878 find_constant (insn, &imm_values);
45879 *imm = imm_values.imm;
45880 *imm32 = imm_values.imm32;
45881 *imm64 = imm_values.imm64;
45882 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45885 /* This function indicates if an operand of an instruction is an
45886 immediate. */
45888 static bool
45889 has_immediate (rtx insn)
45891 int num_imm_operand;
45892 int num_imm32_operand;
45893 int num_imm64_operand;
45895 if (insn)
45896 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45897 &num_imm64_operand);
45898 return false;
45901 /* Return single or double path for instructions. */
45903 static enum insn_path
45904 get_insn_path (rtx insn)
45906 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45908 if ((int)path == 0)
45909 return path_single;
45911 if ((int)path == 1)
45912 return path_double;
45914 return path_multi;
45917 /* Return insn dispatch group. */
45919 static enum dispatch_group
45920 get_insn_group (rtx insn)
45922 enum dispatch_group group = get_mem_group (insn);
45923 if (group)
45924 return group;
45926 if (is_branch (insn))
45927 return disp_branch;
45929 if (is_cmp (insn))
45930 return disp_cmp;
45932 if (has_immediate (insn))
45933 return disp_imm;
45935 if (is_prefetch (insn))
45936 return disp_prefetch;
45938 return disp_no_group;
45941 /* Count number of GROUP restricted instructions in a dispatch
45942 window WINDOW_LIST. */
45944 static int
45945 count_num_restricted (rtx insn, dispatch_windows *window_list)
45947 enum dispatch_group group = get_insn_group (insn);
45948 int imm_size;
45949 int num_imm_operand;
45950 int num_imm32_operand;
45951 int num_imm64_operand;
45953 if (group == disp_no_group)
45954 return 0;
45956 if (group == disp_imm)
45958 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45959 &num_imm64_operand);
45960 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45961 || num_imm_operand + window_list->num_imm > MAX_IMM
45962 || (num_imm32_operand > 0
45963 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45964 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45965 || (num_imm64_operand > 0
45966 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45967 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45968 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45969 && num_imm64_operand > 0
45970 && ((window_list->num_imm_64 > 0
45971 && window_list->num_insn >= 2)
45972 || window_list->num_insn >= 3)))
45973 return BIG;
45975 return 1;
45978 if ((group == disp_load_store
45979 && (window_list->num_loads >= MAX_LOAD
45980 || window_list->num_stores >= MAX_STORE))
45981 || ((group == disp_load
45982 || group == disp_prefetch)
45983 && window_list->num_loads >= MAX_LOAD)
45984 || (group == disp_store
45985 && window_list->num_stores >= MAX_STORE))
45986 return BIG;
45988 return 1;
45991 /* This function returns true if insn satisfies dispatch rules on the
45992 last window scheduled. */
45994 static bool
45995 fits_dispatch_window (rtx insn)
45997 dispatch_windows *window_list = dispatch_window_list;
45998 dispatch_windows *window_list_next = dispatch_window_list->next;
45999 unsigned int num_restrict;
46000 enum dispatch_group group = get_insn_group (insn);
46001 enum insn_path path = get_insn_path (insn);
46002 int sum;
46004 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46005 instructions should be given the lowest priority in the
46006 scheduling process in Haifa scheduler to make sure they will be
46007 scheduled in the same dispatch window as the reference to them. */
46008 if (group == disp_jcc || group == disp_cmp)
46009 return false;
46011 /* Check nonrestricted. */
46012 if (group == disp_no_group || group == disp_branch)
46013 return true;
46015 /* Get last dispatch window. */
46016 if (window_list_next)
46017 window_list = window_list_next;
46019 if (window_list->window_num == 1)
46021 sum = window_list->prev->window_size + window_list->window_size;
46023 if (sum == 32
46024 || (min_insn_size (insn) + sum) >= 48)
46025 /* Window 1 is full. Go for next window. */
46026 return true;
46029 num_restrict = count_num_restricted (insn, window_list);
46031 if (num_restrict > num_allowable_groups[group])
46032 return false;
46034 /* See if it fits in the first window. */
46035 if (window_list->window_num == 0)
46037 /* The first widow should have only single and double path
46038 uops. */
46039 if (path == path_double
46040 && (window_list->num_uops + 2) > MAX_INSN)
46041 return false;
46042 else if (path != path_single)
46043 return false;
46045 return true;
46048 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46049 dispatch window WINDOW_LIST. */
46051 static void
46052 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46054 int byte_len = min_insn_size (insn);
46055 int num_insn = window_list->num_insn;
46056 int imm_size;
46057 sched_insn_info *window = window_list->window;
46058 enum dispatch_group group = get_insn_group (insn);
46059 enum insn_path path = get_insn_path (insn);
46060 int num_imm_operand;
46061 int num_imm32_operand;
46062 int num_imm64_operand;
46064 if (!window_list->violation && group != disp_cmp
46065 && !fits_dispatch_window (insn))
46066 window_list->violation = true;
46068 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46069 &num_imm64_operand);
46071 /* Initialize window with new instruction. */
46072 window[num_insn].insn = insn;
46073 window[num_insn].byte_len = byte_len;
46074 window[num_insn].group = group;
46075 window[num_insn].path = path;
46076 window[num_insn].imm_bytes = imm_size;
46078 window_list->window_size += byte_len;
46079 window_list->num_insn = num_insn + 1;
46080 window_list->num_uops = window_list->num_uops + num_uops;
46081 window_list->imm_size += imm_size;
46082 window_list->num_imm += num_imm_operand;
46083 window_list->num_imm_32 += num_imm32_operand;
46084 window_list->num_imm_64 += num_imm64_operand;
46086 if (group == disp_store)
46087 window_list->num_stores += 1;
46088 else if (group == disp_load
46089 || group == disp_prefetch)
46090 window_list->num_loads += 1;
46091 else if (group == disp_load_store)
46093 window_list->num_stores += 1;
46094 window_list->num_loads += 1;
46098 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46099 If the total bytes of instructions or the number of instructions in
46100 the window exceed allowable, it allocates a new window. */
46102 static void
46103 add_to_dispatch_window (rtx insn)
46105 int byte_len;
46106 dispatch_windows *window_list;
46107 dispatch_windows *next_list;
46108 dispatch_windows *window0_list;
46109 enum insn_path path;
46110 enum dispatch_group insn_group;
46111 bool insn_fits;
46112 int num_insn;
46113 int num_uops;
46114 int window_num;
46115 int insn_num_uops;
46116 int sum;
46118 if (INSN_CODE (insn) < 0)
46119 return;
46121 byte_len = min_insn_size (insn);
46122 window_list = dispatch_window_list;
46123 next_list = window_list->next;
46124 path = get_insn_path (insn);
46125 insn_group = get_insn_group (insn);
46127 /* Get the last dispatch window. */
46128 if (next_list)
46129 window_list = dispatch_window_list->next;
46131 if (path == path_single)
46132 insn_num_uops = 1;
46133 else if (path == path_double)
46134 insn_num_uops = 2;
46135 else
46136 insn_num_uops = (int) path;
46138 /* If current window is full, get a new window.
46139 Window number zero is full, if MAX_INSN uops are scheduled in it.
46140 Window number one is full, if window zero's bytes plus window
46141 one's bytes is 32, or if the bytes of the new instruction added
46142 to the total makes it greater than 48, or it has already MAX_INSN
46143 instructions in it. */
46144 num_insn = window_list->num_insn;
46145 num_uops = window_list->num_uops;
46146 window_num = window_list->window_num;
46147 insn_fits = fits_dispatch_window (insn);
46149 if (num_insn >= MAX_INSN
46150 || num_uops + insn_num_uops > MAX_INSN
46151 || !(insn_fits))
46153 window_num = ~window_num & 1;
46154 window_list = allocate_next_window (window_num);
46157 if (window_num == 0)
46159 add_insn_window (insn, window_list, insn_num_uops);
46160 if (window_list->num_insn >= MAX_INSN
46161 && insn_group == disp_branch)
46163 process_end_window ();
46164 return;
46167 else if (window_num == 1)
46169 window0_list = window_list->prev;
46170 sum = window0_list->window_size + window_list->window_size;
46171 if (sum == 32
46172 || (byte_len + sum) >= 48)
46174 process_end_window ();
46175 window_list = dispatch_window_list;
46178 add_insn_window (insn, window_list, insn_num_uops);
46180 else
46181 gcc_unreachable ();
46183 if (is_end_basic_block (insn_group))
46185 /* End of basic block is reached do end-basic-block process. */
46186 process_end_window ();
46187 return;
46191 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46193 DEBUG_FUNCTION static void
46194 debug_dispatch_window_file (FILE *file, int window_num)
46196 dispatch_windows *list;
46197 int i;
46199 if (window_num == 0)
46200 list = dispatch_window_list;
46201 else
46202 list = dispatch_window_list1;
46204 fprintf (file, "Window #%d:\n", list->window_num);
46205 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46206 list->num_insn, list->num_uops, list->window_size);
46207 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46208 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46210 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46211 list->num_stores);
46212 fprintf (file, " insn info:\n");
46214 for (i = 0; i < MAX_INSN; i++)
46216 if (!list->window[i].insn)
46217 break;
46218 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46219 i, group_name[list->window[i].group],
46220 i, (void *)list->window[i].insn,
46221 i, list->window[i].path,
46222 i, list->window[i].byte_len,
46223 i, list->window[i].imm_bytes);
46227 /* Print to stdout a dispatch window. */
46229 DEBUG_FUNCTION void
46230 debug_dispatch_window (int window_num)
46232 debug_dispatch_window_file (stdout, window_num);
46235 /* Print INSN dispatch information to FILE. */
46237 DEBUG_FUNCTION static void
46238 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46240 int byte_len;
46241 enum insn_path path;
46242 enum dispatch_group group;
46243 int imm_size;
46244 int num_imm_operand;
46245 int num_imm32_operand;
46246 int num_imm64_operand;
46248 if (INSN_CODE (insn) < 0)
46249 return;
46251 byte_len = min_insn_size (insn);
46252 path = get_insn_path (insn);
46253 group = get_insn_group (insn);
46254 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46255 &num_imm64_operand);
46257 fprintf (file, " insn info:\n");
46258 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46259 group_name[group], path, byte_len);
46260 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46261 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46264 /* Print to STDERR the status of the ready list with respect to
46265 dispatch windows. */
46267 DEBUG_FUNCTION void
46268 debug_ready_dispatch (void)
46270 int i;
46271 int no_ready = number_in_ready ();
46273 fprintf (stdout, "Number of ready: %d\n", no_ready);
46275 for (i = 0; i < no_ready; i++)
46276 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46279 /* This routine is the driver of the dispatch scheduler. */
46281 static void
46282 do_dispatch (rtx insn, int mode)
46284 if (mode == DISPATCH_INIT)
46285 init_dispatch_sched ();
46286 else if (mode == ADD_TO_DISPATCH_WINDOW)
46287 add_to_dispatch_window (insn);
46290 /* Return TRUE if Dispatch Scheduling is supported. */
46292 static bool
46293 has_dispatch (rtx insn, int action)
46295 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46296 && flag_dispatch_scheduler)
46297 switch (action)
46299 default:
46300 return false;
46302 case IS_DISPATCH_ON:
46303 return true;
46304 break;
46306 case IS_CMP:
46307 return is_cmp (insn);
46309 case DISPATCH_VIOLATION:
46310 return dispatch_violation ();
46312 case FITS_DISPATCH_WINDOW:
46313 return fits_dispatch_window (insn);
46316 return false;
46319 /* Implementation of reassociation_width target hook used by
46320 reassoc phase to identify parallelism level in reassociated
46321 tree. Statements tree_code is passed in OPC. Arguments type
46322 is passed in MODE.
46324 Currently parallel reassociation is enabled for Atom
46325 processors only and we set reassociation width to be 2
46326 because Atom may issue up to 2 instructions per cycle.
46328 Return value should be fixed if parallel reassociation is
46329 enabled for other processors. */
46331 static int
46332 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46333 enum machine_mode mode)
46335 int res = 1;
46337 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46338 res = 2;
46339 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46340 res = 2;
46342 return res;
46345 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46346 place emms and femms instructions. */
46348 static enum machine_mode
46349 ix86_preferred_simd_mode (enum machine_mode mode)
46351 if (!TARGET_SSE)
46352 return word_mode;
46354 switch (mode)
46356 case QImode:
46357 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46358 case HImode:
46359 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46360 case SImode:
46361 return TARGET_AVX512F ? V16SImode :
46362 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46363 case DImode:
46364 return TARGET_AVX512F ? V8DImode :
46365 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46367 case SFmode:
46368 if (TARGET_AVX512F)
46369 return V16SFmode;
46370 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46371 return V8SFmode;
46372 else
46373 return V4SFmode;
46375 case DFmode:
46376 if (!TARGET_VECTORIZE_DOUBLE)
46377 return word_mode;
46378 else if (TARGET_AVX512F)
46379 return V8DFmode;
46380 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46381 return V4DFmode;
46382 else if (TARGET_SSE2)
46383 return V2DFmode;
46384 /* FALLTHRU */
46386 default:
46387 return word_mode;
46391 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46392 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46393 256bit and 128bit vectors. */
46395 static unsigned int
46396 ix86_autovectorize_vector_sizes (void)
46398 return TARGET_AVX512F ? 64 | 32 | 16 :
46399 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46404 /* Return class of registers which could be used for pseudo of MODE
46405 and of class RCLASS for spilling instead of memory. Return NO_REGS
46406 if it is not possible or non-profitable. */
46407 static reg_class_t
46408 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46410 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46411 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46412 && INTEGER_CLASS_P (rclass))
46413 return ALL_SSE_REGS;
46414 return NO_REGS;
46417 /* Implement targetm.vectorize.init_cost. */
46419 static void *
46420 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46422 unsigned *cost = XNEWVEC (unsigned, 3);
46423 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46424 return cost;
46427 /* Implement targetm.vectorize.add_stmt_cost. */
46429 static unsigned
46430 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46431 struct _stmt_vec_info *stmt_info, int misalign,
46432 enum vect_cost_model_location where)
46434 unsigned *cost = (unsigned *) data;
46435 unsigned retval = 0;
46437 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46438 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46440 /* Statements in an inner loop relative to the loop being
46441 vectorized are weighted more heavily. The value here is
46442 arbitrary and could potentially be improved with analysis. */
46443 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46444 count *= 50; /* FIXME. */
46446 retval = (unsigned) (count * stmt_cost);
46448 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46449 for Silvermont as it has out of order integer pipeline and can execute
46450 2 scalar instruction per tick, but has in order SIMD pipeline. */
46451 if (TARGET_SILVERMONT || TARGET_INTEL)
46452 if (stmt_info && stmt_info->stmt)
46454 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46455 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46456 retval = (retval * 17) / 10;
46459 cost[where] += retval;
46461 return retval;
46464 /* Implement targetm.vectorize.finish_cost. */
46466 static void
46467 ix86_finish_cost (void *data, unsigned *prologue_cost,
46468 unsigned *body_cost, unsigned *epilogue_cost)
46470 unsigned *cost = (unsigned *) data;
46471 *prologue_cost = cost[vect_prologue];
46472 *body_cost = cost[vect_body];
46473 *epilogue_cost = cost[vect_epilogue];
46476 /* Implement targetm.vectorize.destroy_cost_data. */
46478 static void
46479 ix86_destroy_cost_data (void *data)
46481 free (data);
46484 /* Validate target specific memory model bits in VAL. */
46486 static unsigned HOST_WIDE_INT
46487 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46489 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46490 bool strong;
46492 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46493 |MEMMODEL_MASK)
46494 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46496 warning (OPT_Winvalid_memory_model,
46497 "Unknown architecture specific memory model");
46498 return MEMMODEL_SEQ_CST;
46500 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46501 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46503 warning (OPT_Winvalid_memory_model,
46504 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46505 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46507 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46509 warning (OPT_Winvalid_memory_model,
46510 "HLE_RELEASE not used with RELEASE or stronger memory model");
46511 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46513 return val;
46516 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46517 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46518 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46519 or number of vecsize_mangle variants that should be emitted. */
46521 static int
46522 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46523 struct cgraph_simd_clone *clonei,
46524 tree base_type, int num)
46526 int ret = 1;
46528 if (clonei->simdlen
46529 && (clonei->simdlen < 2
46530 || clonei->simdlen > 16
46531 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46533 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46534 "unsupported simdlen %d", clonei->simdlen);
46535 return 0;
46538 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46539 if (TREE_CODE (ret_type) != VOID_TYPE)
46540 switch (TYPE_MODE (ret_type))
46542 case QImode:
46543 case HImode:
46544 case SImode:
46545 case DImode:
46546 case SFmode:
46547 case DFmode:
46548 /* case SCmode: */
46549 /* case DCmode: */
46550 break;
46551 default:
46552 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46553 "unsupported return type %qT for simd\n", ret_type);
46554 return 0;
46557 tree t;
46558 int i;
46560 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46561 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46562 switch (TYPE_MODE (TREE_TYPE (t)))
46564 case QImode:
46565 case HImode:
46566 case SImode:
46567 case DImode:
46568 case SFmode:
46569 case DFmode:
46570 /* case SCmode: */
46571 /* case DCmode: */
46572 break;
46573 default:
46574 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46575 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46576 return 0;
46579 if (clonei->cilk_elemental)
46581 /* Parse here processor clause. If not present, default to 'b'. */
46582 clonei->vecsize_mangle = 'b';
46584 else if (!TREE_PUBLIC (node->decl))
46586 /* If the function isn't exported, we can pick up just one ISA
46587 for the clones. */
46588 if (TARGET_AVX2)
46589 clonei->vecsize_mangle = 'd';
46590 else if (TARGET_AVX)
46591 clonei->vecsize_mangle = 'c';
46592 else
46593 clonei->vecsize_mangle = 'b';
46594 ret = 1;
46596 else
46598 clonei->vecsize_mangle = "bcd"[num];
46599 ret = 3;
46601 switch (clonei->vecsize_mangle)
46603 case 'b':
46604 clonei->vecsize_int = 128;
46605 clonei->vecsize_float = 128;
46606 break;
46607 case 'c':
46608 clonei->vecsize_int = 128;
46609 clonei->vecsize_float = 256;
46610 break;
46611 case 'd':
46612 clonei->vecsize_int = 256;
46613 clonei->vecsize_float = 256;
46614 break;
46616 if (clonei->simdlen == 0)
46618 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46619 clonei->simdlen = clonei->vecsize_int;
46620 else
46621 clonei->simdlen = clonei->vecsize_float;
46622 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46623 if (clonei->simdlen > 16)
46624 clonei->simdlen = 16;
46626 return ret;
46629 /* Add target attribute to SIMD clone NODE if needed. */
46631 static void
46632 ix86_simd_clone_adjust (struct cgraph_node *node)
46634 const char *str = NULL;
46635 gcc_assert (node->decl == cfun->decl);
46636 switch (node->simdclone->vecsize_mangle)
46638 case 'b':
46639 if (!TARGET_SSE2)
46640 str = "sse2";
46641 break;
46642 case 'c':
46643 if (!TARGET_AVX)
46644 str = "avx";
46645 break;
46646 case 'd':
46647 if (!TARGET_AVX2)
46648 str = "avx2";
46649 break;
46650 default:
46651 gcc_unreachable ();
46653 if (str == NULL)
46654 return;
46655 push_cfun (NULL);
46656 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46657 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46658 gcc_assert (ok);
46659 pop_cfun ();
46660 ix86_previous_fndecl = NULL_TREE;
46661 ix86_set_current_function (node->decl);
46664 /* If SIMD clone NODE can't be used in a vectorized loop
46665 in current function, return -1, otherwise return a badness of using it
46666 (0 if it is most desirable from vecsize_mangle point of view, 1
46667 slightly less desirable, etc.). */
46669 static int
46670 ix86_simd_clone_usable (struct cgraph_node *node)
46672 switch (node->simdclone->vecsize_mangle)
46674 case 'b':
46675 if (!TARGET_SSE2)
46676 return -1;
46677 if (!TARGET_AVX)
46678 return 0;
46679 return TARGET_AVX2 ? 2 : 1;
46680 case 'c':
46681 if (!TARGET_AVX)
46682 return -1;
46683 return TARGET_AVX2 ? 1 : 0;
46684 break;
46685 case 'd':
46686 if (!TARGET_AVX2)
46687 return -1;
46688 return 0;
46689 default:
46690 gcc_unreachable ();
46694 /* This function gives out the number of memory references.
46695 This value determines the unrolling factor for
46696 bdver3 and bdver4 architectures. */
46698 static int
46699 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46701 if (*x != NULL_RTX && MEM_P (*x))
46703 enum machine_mode mode;
46704 unsigned int n_words;
46706 mode = GET_MODE (*x);
46707 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46709 if (n_words > 4)
46710 (*mem_count)+=2;
46711 else
46712 (*mem_count)+=1;
46714 return 0;
46717 /* This function adjusts the unroll factor based on
46718 the hardware capabilities. For ex, bdver3 has
46719 a loop buffer which makes unrolling of smaller
46720 loops less important. This function decides the
46721 unroll factor using number of memory references
46722 (value 32 is used) as a heuristic. */
46724 static unsigned
46725 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46727 basic_block *bbs;
46728 rtx insn;
46729 unsigned i;
46730 unsigned mem_count = 0;
46732 if (!TARGET_ADJUST_UNROLL)
46733 return nunroll;
46735 /* Count the number of memory references within the loop body. */
46736 bbs = get_loop_body (loop);
46737 for (i = 0; i < loop->num_nodes; i++)
46739 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46740 if (NONDEBUG_INSN_P (insn))
46741 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46743 free (bbs);
46745 if (mem_count && mem_count <=32)
46746 return 32/mem_count;
46748 return nunroll;
46752 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46754 static bool
46755 ix86_float_exceptions_rounding_supported_p (void)
46757 /* For x87 floating point with standard excess precision handling,
46758 there is no adddf3 pattern (since x87 floating point only has
46759 XFmode operations) so the default hook implementation gets this
46760 wrong. */
46761 return TARGET_80387 || TARGET_SSE_MATH;
46764 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46766 static void
46767 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46769 if (!TARGET_80387 && !TARGET_SSE_MATH)
46770 return;
46771 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46772 if (TARGET_80387)
46774 tree fenv_index_type = build_index_type (size_int (6));
46775 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46776 tree fenv_var = create_tmp_var (fenv_type, NULL);
46777 mark_addressable (fenv_var);
46778 tree fenv_ptr = build_pointer_type (fenv_type);
46779 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46780 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46781 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46782 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46783 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46784 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46785 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46786 tree hold_fnclex = build_call_expr (fnclex, 0);
46787 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46788 hold_fnclex);
46789 *clear = build_call_expr (fnclex, 0);
46790 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46791 mark_addressable (sw_var);
46792 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46793 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46794 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46795 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46796 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46797 exceptions_var, exceptions_x87);
46798 *update = build2 (COMPOUND_EXPR, integer_type_node,
46799 fnstsw_call, update_mod);
46800 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46801 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46803 if (TARGET_SSE_MATH)
46805 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46806 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46807 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46808 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46809 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46810 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46811 mxcsr_orig_var, stmxcsr_hold_call);
46812 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46813 mxcsr_orig_var,
46814 build_int_cst (unsigned_type_node, 0x1f80));
46815 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46816 build_int_cst (unsigned_type_node, 0xffffffc0));
46817 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46818 mxcsr_mod_var, hold_mod_val);
46819 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46820 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46821 hold_assign_orig, hold_assign_mod);
46822 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46823 ldmxcsr_hold_call);
46824 if (*hold)
46825 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46826 else
46827 *hold = hold_all;
46828 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46829 if (*clear)
46830 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46831 ldmxcsr_clear_call);
46832 else
46833 *clear = ldmxcsr_clear_call;
46834 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46835 tree exceptions_sse = fold_convert (integer_type_node,
46836 stxmcsr_update_call);
46837 if (*update)
46839 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46840 exceptions_var, exceptions_sse);
46841 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46842 exceptions_var, exceptions_mod);
46843 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46844 exceptions_assign);
46846 else
46847 *update = build2 (MODIFY_EXPR, integer_type_node,
46848 exceptions_var, exceptions_sse);
46849 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46850 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46851 ldmxcsr_update_call);
46853 tree atomic_feraiseexcept
46854 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46855 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46856 1, exceptions_var);
46857 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46858 atomic_feraiseexcept_call);
46861 /* Initialize the GCC target structure. */
46862 #undef TARGET_RETURN_IN_MEMORY
46863 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46865 #undef TARGET_LEGITIMIZE_ADDRESS
46866 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46868 #undef TARGET_ATTRIBUTE_TABLE
46869 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46870 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46871 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46872 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46873 # undef TARGET_MERGE_DECL_ATTRIBUTES
46874 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46875 #endif
46877 #undef TARGET_COMP_TYPE_ATTRIBUTES
46878 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46880 #undef TARGET_INIT_BUILTINS
46881 #define TARGET_INIT_BUILTINS ix86_init_builtins
46882 #undef TARGET_BUILTIN_DECL
46883 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46884 #undef TARGET_EXPAND_BUILTIN
46885 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46887 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46888 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46889 ix86_builtin_vectorized_function
46891 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46892 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46894 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46895 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46897 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46898 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46900 #undef TARGET_BUILTIN_RECIPROCAL
46901 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46903 #undef TARGET_ASM_FUNCTION_EPILOGUE
46904 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46906 #undef TARGET_ENCODE_SECTION_INFO
46907 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46908 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46909 #else
46910 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46911 #endif
46913 #undef TARGET_ASM_OPEN_PAREN
46914 #define TARGET_ASM_OPEN_PAREN ""
46915 #undef TARGET_ASM_CLOSE_PAREN
46916 #define TARGET_ASM_CLOSE_PAREN ""
46918 #undef TARGET_ASM_BYTE_OP
46919 #define TARGET_ASM_BYTE_OP ASM_BYTE
46921 #undef TARGET_ASM_ALIGNED_HI_OP
46922 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46923 #undef TARGET_ASM_ALIGNED_SI_OP
46924 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46925 #ifdef ASM_QUAD
46926 #undef TARGET_ASM_ALIGNED_DI_OP
46927 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46928 #endif
46930 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46931 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46933 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46934 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46936 #undef TARGET_ASM_UNALIGNED_HI_OP
46937 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46938 #undef TARGET_ASM_UNALIGNED_SI_OP
46939 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46940 #undef TARGET_ASM_UNALIGNED_DI_OP
46941 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46943 #undef TARGET_PRINT_OPERAND
46944 #define TARGET_PRINT_OPERAND ix86_print_operand
46945 #undef TARGET_PRINT_OPERAND_ADDRESS
46946 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46947 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46948 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46949 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46950 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46952 #undef TARGET_SCHED_INIT_GLOBAL
46953 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46954 #undef TARGET_SCHED_ADJUST_COST
46955 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46956 #undef TARGET_SCHED_ISSUE_RATE
46957 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46958 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46959 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46960 ia32_multipass_dfa_lookahead
46961 #undef TARGET_SCHED_MACRO_FUSION_P
46962 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46963 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46964 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46966 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46967 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46969 #undef TARGET_MEMMODEL_CHECK
46970 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46972 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46973 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46975 #ifdef HAVE_AS_TLS
46976 #undef TARGET_HAVE_TLS
46977 #define TARGET_HAVE_TLS true
46978 #endif
46979 #undef TARGET_CANNOT_FORCE_CONST_MEM
46980 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46981 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46982 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46984 #undef TARGET_DELEGITIMIZE_ADDRESS
46985 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46987 #undef TARGET_MS_BITFIELD_LAYOUT_P
46988 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46990 #if TARGET_MACHO
46991 #undef TARGET_BINDS_LOCAL_P
46992 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
46993 #endif
46994 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46995 #undef TARGET_BINDS_LOCAL_P
46996 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
46997 #endif
46999 #undef TARGET_ASM_OUTPUT_MI_THUNK
47000 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47001 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47002 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47004 #undef TARGET_ASM_FILE_START
47005 #define TARGET_ASM_FILE_START x86_file_start
47007 #undef TARGET_OPTION_OVERRIDE
47008 #define TARGET_OPTION_OVERRIDE ix86_option_override
47010 #undef TARGET_REGISTER_MOVE_COST
47011 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47012 #undef TARGET_MEMORY_MOVE_COST
47013 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47014 #undef TARGET_RTX_COSTS
47015 #define TARGET_RTX_COSTS ix86_rtx_costs
47016 #undef TARGET_ADDRESS_COST
47017 #define TARGET_ADDRESS_COST ix86_address_cost
47019 #undef TARGET_FIXED_CONDITION_CODE_REGS
47020 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47021 #undef TARGET_CC_MODES_COMPATIBLE
47022 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47024 #undef TARGET_MACHINE_DEPENDENT_REORG
47025 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47027 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47028 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47030 #undef TARGET_BUILD_BUILTIN_VA_LIST
47031 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47033 #undef TARGET_FOLD_BUILTIN
47034 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47036 #undef TARGET_COMPARE_VERSION_PRIORITY
47037 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47039 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47040 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47041 ix86_generate_version_dispatcher_body
47043 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47044 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47045 ix86_get_function_versions_dispatcher
47047 #undef TARGET_ENUM_VA_LIST_P
47048 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47050 #undef TARGET_FN_ABI_VA_LIST
47051 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47053 #undef TARGET_CANONICAL_VA_LIST_TYPE
47054 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47056 #undef TARGET_EXPAND_BUILTIN_VA_START
47057 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47059 #undef TARGET_MD_ASM_CLOBBERS
47060 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47062 #undef TARGET_PROMOTE_PROTOTYPES
47063 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47064 #undef TARGET_SETUP_INCOMING_VARARGS
47065 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47066 #undef TARGET_MUST_PASS_IN_STACK
47067 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47068 #undef TARGET_FUNCTION_ARG_ADVANCE
47069 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47070 #undef TARGET_FUNCTION_ARG
47071 #define TARGET_FUNCTION_ARG ix86_function_arg
47072 #undef TARGET_FUNCTION_ARG_BOUNDARY
47073 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47074 #undef TARGET_PASS_BY_REFERENCE
47075 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47076 #undef TARGET_INTERNAL_ARG_POINTER
47077 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47078 #undef TARGET_UPDATE_STACK_BOUNDARY
47079 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47080 #undef TARGET_GET_DRAP_RTX
47081 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47082 #undef TARGET_STRICT_ARGUMENT_NAMING
47083 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47084 #undef TARGET_STATIC_CHAIN
47085 #define TARGET_STATIC_CHAIN ix86_static_chain
47086 #undef TARGET_TRAMPOLINE_INIT
47087 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47088 #undef TARGET_RETURN_POPS_ARGS
47089 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47091 #undef TARGET_LEGITIMATE_COMBINED_INSN
47092 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47094 #undef TARGET_ASAN_SHADOW_OFFSET
47095 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47097 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47098 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47100 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47101 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47103 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47104 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47106 #undef TARGET_C_MODE_FOR_SUFFIX
47107 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47109 #ifdef HAVE_AS_TLS
47110 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47111 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47112 #endif
47114 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47115 #undef TARGET_INSERT_ATTRIBUTES
47116 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47117 #endif
47119 #undef TARGET_MANGLE_TYPE
47120 #define TARGET_MANGLE_TYPE ix86_mangle_type
47122 #if !TARGET_MACHO
47123 #undef TARGET_STACK_PROTECT_FAIL
47124 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47125 #endif
47127 #undef TARGET_FUNCTION_VALUE
47128 #define TARGET_FUNCTION_VALUE ix86_function_value
47130 #undef TARGET_FUNCTION_VALUE_REGNO_P
47131 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47133 #undef TARGET_PROMOTE_FUNCTION_MODE
47134 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47136 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47137 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47139 #undef TARGET_INSTANTIATE_DECLS
47140 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47142 #undef TARGET_SECONDARY_RELOAD
47143 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47145 #undef TARGET_CLASS_MAX_NREGS
47146 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47148 #undef TARGET_PREFERRED_RELOAD_CLASS
47149 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47150 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47151 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47152 #undef TARGET_CLASS_LIKELY_SPILLED_P
47153 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47155 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47156 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47157 ix86_builtin_vectorization_cost
47158 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47159 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47160 ix86_vectorize_vec_perm_const_ok
47161 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47162 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47163 ix86_preferred_simd_mode
47164 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47165 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47166 ix86_autovectorize_vector_sizes
47167 #undef TARGET_VECTORIZE_INIT_COST
47168 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47169 #undef TARGET_VECTORIZE_ADD_STMT_COST
47170 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47171 #undef TARGET_VECTORIZE_FINISH_COST
47172 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47173 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47174 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47176 #undef TARGET_SET_CURRENT_FUNCTION
47177 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47179 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47180 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47182 #undef TARGET_OPTION_SAVE
47183 #define TARGET_OPTION_SAVE ix86_function_specific_save
47185 #undef TARGET_OPTION_RESTORE
47186 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47188 #undef TARGET_OPTION_PRINT
47189 #define TARGET_OPTION_PRINT ix86_function_specific_print
47191 #undef TARGET_OPTION_FUNCTION_VERSIONS
47192 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47194 #undef TARGET_CAN_INLINE_P
47195 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47197 #undef TARGET_EXPAND_TO_RTL_HOOK
47198 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47200 #undef TARGET_LEGITIMATE_ADDRESS_P
47201 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47203 #undef TARGET_LRA_P
47204 #define TARGET_LRA_P hook_bool_void_true
47206 #undef TARGET_REGISTER_PRIORITY
47207 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47209 #undef TARGET_REGISTER_USAGE_LEVELING_P
47210 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47212 #undef TARGET_LEGITIMATE_CONSTANT_P
47213 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47215 #undef TARGET_FRAME_POINTER_REQUIRED
47216 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47218 #undef TARGET_CAN_ELIMINATE
47219 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47221 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47222 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47224 #undef TARGET_ASM_CODE_END
47225 #define TARGET_ASM_CODE_END ix86_code_end
47227 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47228 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47230 #if TARGET_MACHO
47231 #undef TARGET_INIT_LIBFUNCS
47232 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47233 #endif
47235 #undef TARGET_LOOP_UNROLL_ADJUST
47236 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47238 #undef TARGET_SPILL_CLASS
47239 #define TARGET_SPILL_CLASS ix86_spill_class
47241 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47242 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47243 ix86_simd_clone_compute_vecsize_and_simdlen
47245 #undef TARGET_SIMD_CLONE_ADJUST
47246 #define TARGET_SIMD_CLONE_ADJUST \
47247 ix86_simd_clone_adjust
47249 #undef TARGET_SIMD_CLONE_USABLE
47250 #define TARGET_SIMD_CLONE_USABLE \
47251 ix86_simd_clone_usable
47253 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47254 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47255 ix86_float_exceptions_rounding_supported_p
47257 #undef TARGET_MODE_EMIT
47258 #define TARGET_MODE_EMIT ix86_emit_mode_set
47260 #undef TARGET_MODE_NEEDED
47261 #define TARGET_MODE_NEEDED ix86_mode_needed
47263 #undef TARGET_MODE_AFTER
47264 #define TARGET_MODE_AFTER ix86_mode_after
47266 #undef TARGET_MODE_ENTRY
47267 #define TARGET_MODE_ENTRY ix86_mode_entry
47269 #undef TARGET_MODE_EXIT
47270 #define TARGET_MODE_EXIT ix86_mode_exit
47272 #undef TARGET_MODE_PRIORITY
47273 #define TARGET_MODE_PRIORITY ix86_mode_priority
47275 struct gcc_target targetm = TARGET_INITIALIZER;
47277 #include "gt-i386.h"