Merge trunk version 214779 into gupc branch.
[official-gcc.git] / gcc / config / i386 / i386.c
blob3558b4e93daa96c04bff89ea76b5dd98bd17fb57
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "hash-table.h"
56 #include "vec.h"
57 #include "basic-block.h"
58 #include "tree-ssa-alias.h"
59 #include "internal-fn.h"
60 #include "gimple-fold.h"
61 #include "tree-eh.h"
62 #include "gimple-expr.h"
63 #include "is-a.h"
64 #include "gimple.h"
65 #include "gimplify.h"
66 #include "cfgloop.h"
67 #include "dwarf2.h"
68 #include "df.h"
69 #include "tm-constrs.h"
70 #include "params.h"
71 #include "cselib.h"
72 #include "debug.h"
73 #include "sched-int.h"
74 #include "sbitmap.h"
75 #include "fibheap.h"
76 #include "opts.h"
77 #include "diagnostic.h"
78 #include "dumpfile.h"
79 #include "tree-pass.h"
80 #include "wide-int.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-vectorizer.h"
85 #include "shrink-wrap.h"
86 #include "builtins.h"
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2135 /* The "default" register map used in 32bit mode. */
2137 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2151 /* The "default" register map used in 64bit mode. */
2153 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2211 numbers.
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2221 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2235 /* Define parameter passing and return registers. */
2237 static int const x86_64_int_parameter_registers[6] =
2239 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2242 static int const x86_64_ms_abi_int_parameter_registers[4] =
2244 CX_REG, DX_REG, R8_REG, R9_REG
2247 static int const x86_64_int_return_registers[4] =
2249 AX_REG, DX_REG, DI_REG, SI_REG
2252 /* Additional registers that are clobbered by SYSV calls. */
2254 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2256 SI_REG, DI_REG,
2257 XMM6_REG, XMM7_REG,
2258 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2259 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2262 /* Define the structure for the machine field in struct function. */
2264 struct GTY(()) stack_local_entry {
2265 unsigned short mode;
2266 unsigned short n;
2267 rtx rtl;
2268 struct stack_local_entry *next;
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2274 [arguments]
2275 <- ARG_POINTER
2276 saved pc
2278 saved static chain if ix86_static_chain_on_stack
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2282 [saved regs]
2283 <- regs_save_offset
2284 [padding0]
2286 [saved SSE regs]
2287 <- sse_regs_save_offset
2288 [padding1] |
2289 | <- FRAME_POINTER
2290 [va_arg registers] |
2292 [frame] |
2294 [padding2] | = to_allocate
2295 <- STACK_POINTER
2297 struct ix86_frame
2299 int nsseregs;
2300 int nregs;
2301 int va_arg_size;
2302 int red_zone_size;
2303 int outgoing_arguments_size;
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset;
2307 HOST_WIDE_INT hard_frame_pointer_offset;
2308 HOST_WIDE_INT stack_pointer_offset;
2309 HOST_WIDE_INT hfp_save_offset;
2310 HOST_WIDE_INT reg_save_offset;
2311 HOST_WIDE_INT sse_reg_save_offset;
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov;
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule;
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune;
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch;
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse;
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string[]
2332 = "force_align_arg_pointer";
2334 static rtx (*ix86_gen_leave) (void);
2335 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2338 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2339 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2342 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary;
2350 /* Alignment for incoming stack boundary in bits specified at
2351 command line. */
2352 static unsigned int ix86_user_incoming_stack_boundary;
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary;
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary;
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node;
2362 static GTY(()) tree ms_va_list_type_node;
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix[16];
2366 int internal_label_prefix_len;
2368 /* Fence to use after loop using movnt. */
2369 tree x86_mfence;
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2380 X86_64_NO_CLASS,
2381 X86_64_INTEGER_CLASS,
2382 X86_64_INTEGERSI_CLASS,
2383 X86_64_SSE_CLASS,
2384 X86_64_SSESF_CLASS,
2385 X86_64_SSEDF_CLASS,
2386 X86_64_SSEUP_CLASS,
2387 X86_64_X87_CLASS,
2388 X86_64_X87UP_CLASS,
2389 X86_64_COMPLEX_X87_CLASS,
2390 X86_64_MEMORY_CLASS
2393 #define MAX_CLASSES 8
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2397 static bool ext_80387_constants_init = 0;
2400 static struct machine_function * ix86_init_machine_status (void);
2401 static rtx ix86_function_value (const_tree, const_tree, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2404 const_tree);
2405 static rtx ix86_static_chain (const_tree, bool);
2406 static int ix86_function_regparm (const_tree, const_tree);
2407 static void ix86_compute_frame_layout (struct ix86_frame *);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2409 rtx, rtx, int);
2410 static void ix86_add_new_builtins (HOST_WIDE_INT);
2411 static tree ix86_canonical_va_list_type (tree);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2416 enum ix86_function_specific_strings
2418 IX86_FUNCTION_SPECIFIC_ARCH,
2419 IX86_FUNCTION_SPECIFIC_TUNE,
2420 IX86_FUNCTION_SPECIFIC_MAX
2423 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2424 const char *, enum fpmath_unit, bool);
2425 static void ix86_function_specific_save (struct cl_target_option *,
2426 struct gcc_options *opts);
2427 static void ix86_function_specific_restore (struct gcc_options *opts,
2428 struct cl_target_option *);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option *);
2431 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2433 struct gcc_options *,
2434 struct gcc_options *,
2435 struct gcc_options *);
2436 static bool ix86_can_inline_p (tree, tree);
2437 static void ix86_set_current_function (tree);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2440 static enum calling_abi ix86_function_abi (const_tree);
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2445 #endif
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted;
2449 static int ix86_arch_specified;
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2454 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2457 /* Processor target table, indexed by processor number */
2458 struct ptt
2460 const char *const name; /* processor name */
2461 const struct processor_costs *cost; /* Processor costs */
2462 const int align_loop; /* Default alignments. */
2463 const int align_loop_max_skip;
2464 const int align_jump;
2465 const int align_jump_max_skip;
2466 const int align_func;
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table[PROCESSOR_max] =
2472 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2499 static unsigned int
2500 rest_of_handle_insert_vzeroupper (void)
2502 int i;
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i = 0; i < MAX_386_ENTITIES; i++)
2509 ix86_optimize_mode_switching[i] = 0;
2511 ix86_optimize_mode_switching[AVX_U128] = 1;
2513 /* Call optimize_mode_switching. */
2514 g->get_passes ()->execute_pass_mode_switching ();
2515 return 0;
2518 namespace {
2520 const pass_data pass_data_insert_vzeroupper =
2522 RTL_PASS, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE, /* optinfo_flags */
2525 TV_NONE, /* tv_id */
2526 0, /* properties_required */
2527 0, /* properties_provided */
2528 0, /* properties_destroyed */
2529 0, /* todo_flags_start */
2530 TODO_df_finish, /* todo_flags_finish */
2533 class pass_insert_vzeroupper : public rtl_opt_pass
2535 public:
2536 pass_insert_vzeroupper(gcc::context *ctxt)
2537 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2540 /* opt_pass methods: */
2541 virtual bool gate (function *)
2543 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2546 virtual unsigned int execute (function *)
2548 return rest_of_handle_insert_vzeroupper ();
2551 }; // class pass_insert_vzeroupper
2553 } // anon namespace
2555 rtl_opt_pass *
2556 make_pass_insert_vzeroupper (gcc::context *ctxt)
2558 return new pass_insert_vzeroupper (ctxt);
2561 /* Return true if a red-zone is in use. */
2563 static inline bool
2564 ix86_using_red_zone (void)
2566 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2572 static char *
2573 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2574 const char *tune, enum fpmath_unit fpmath,
2575 bool add_nl_p)
2577 struct ix86_target_opts
2579 const char *option; /* option string */
2580 HOST_WIDE_INT mask; /* isa mask options */
2583 /* This table is ordered so that options like -msse4.2 that imply
2584 preceding options while match those first. */
2585 static struct ix86_target_opts isa_opts[] =
2587 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2588 { "-mfma", OPTION_MASK_ISA_FMA },
2589 { "-mxop", OPTION_MASK_ISA_XOP },
2590 { "-mlwp", OPTION_MASK_ISA_LWP },
2591 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2592 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2593 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2594 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2595 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2596 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2597 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2598 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2599 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2600 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2601 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2602 { "-msse3", OPTION_MASK_ISA_SSE3 },
2603 { "-msse2", OPTION_MASK_ISA_SSE2 },
2604 { "-msse", OPTION_MASK_ISA_SSE },
2605 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2606 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2607 { "-mmmx", OPTION_MASK_ISA_MMX },
2608 { "-mabm", OPTION_MASK_ISA_ABM },
2609 { "-mbmi", OPTION_MASK_ISA_BMI },
2610 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2611 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2612 { "-mhle", OPTION_MASK_ISA_HLE },
2613 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2614 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2615 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2616 { "-madx", OPTION_MASK_ISA_ADX },
2617 { "-mtbm", OPTION_MASK_ISA_TBM },
2618 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2619 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2620 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2621 { "-maes", OPTION_MASK_ISA_AES },
2622 { "-msha", OPTION_MASK_ISA_SHA },
2623 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2624 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2625 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2626 { "-mf16c", OPTION_MASK_ISA_F16C },
2627 { "-mrtm", OPTION_MASK_ISA_RTM },
2628 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2629 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2630 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2631 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2632 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2633 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2636 /* Flag options. */
2637 static struct ix86_target_opts flag_opts[] =
2639 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2640 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2641 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2642 { "-m80387", MASK_80387 },
2643 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2644 { "-malign-double", MASK_ALIGN_DOUBLE },
2645 { "-mcld", MASK_CLD },
2646 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2647 { "-mieee-fp", MASK_IEEE_FP },
2648 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2649 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2650 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2651 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2652 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2653 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2654 { "-mno-red-zone", MASK_NO_RED_ZONE },
2655 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2656 { "-mrecip", MASK_RECIP },
2657 { "-mrtd", MASK_RTD },
2658 { "-msseregparm", MASK_SSEREGPARM },
2659 { "-mstack-arg-probe", MASK_STACK_PROBE },
2660 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2661 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2662 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2663 { "-mvzeroupper", MASK_VZEROUPPER },
2664 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2665 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2666 { "-mprefer-avx128", MASK_PREFER_AVX128},
2669 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2671 char isa_other[40];
2672 char target_other[40];
2673 unsigned num = 0;
2674 unsigned i, j;
2675 char *ret;
2676 char *ptr;
2677 size_t len;
2678 size_t line_len;
2679 size_t sep_len;
2680 const char *abi;
2682 memset (opts, '\0', sizeof (opts));
2684 /* Add -march= option. */
2685 if (arch)
2687 opts[num][0] = "-march=";
2688 opts[num++][1] = arch;
2691 /* Add -mtune= option. */
2692 if (tune)
2694 opts[num][0] = "-mtune=";
2695 opts[num++][1] = tune;
2698 /* Add -m32/-m64/-mx32. */
2699 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2701 if ((isa & OPTION_MASK_ABI_64) != 0)
2702 abi = "-m64";
2703 else
2704 abi = "-mx32";
2705 isa &= ~ (OPTION_MASK_ISA_64BIT
2706 | OPTION_MASK_ABI_64
2707 | OPTION_MASK_ABI_X32);
2709 else
2710 abi = "-m32";
2711 opts[num++][0] = abi;
2713 /* Pick out the options in isa options. */
2714 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2716 if ((isa & isa_opts[i].mask) != 0)
2718 opts[num++][0] = isa_opts[i].option;
2719 isa &= ~ isa_opts[i].mask;
2723 if (isa && add_nl_p)
2725 opts[num++][0] = isa_other;
2726 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2727 isa);
2730 /* Add flag options. */
2731 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2733 if ((flags & flag_opts[i].mask) != 0)
2735 opts[num++][0] = flag_opts[i].option;
2736 flags &= ~ flag_opts[i].mask;
2740 if (flags && add_nl_p)
2742 opts[num++][0] = target_other;
2743 sprintf (target_other, "(other flags: %#x)", flags);
2746 /* Add -fpmath= option. */
2747 if (fpmath)
2749 opts[num][0] = "-mfpmath=";
2750 switch ((int) fpmath)
2752 case FPMATH_387:
2753 opts[num++][1] = "387";
2754 break;
2756 case FPMATH_SSE:
2757 opts[num++][1] = "sse";
2758 break;
2760 case FPMATH_387 | FPMATH_SSE:
2761 opts[num++][1] = "sse+387";
2762 break;
2764 default:
2765 gcc_unreachable ();
2769 /* Any options? */
2770 if (num == 0)
2771 return NULL;
2773 gcc_assert (num < ARRAY_SIZE (opts));
2775 /* Size the string. */
2776 len = 0;
2777 sep_len = (add_nl_p) ? 3 : 1;
2778 for (i = 0; i < num; i++)
2780 len += sep_len;
2781 for (j = 0; j < 2; j++)
2782 if (opts[i][j])
2783 len += strlen (opts[i][j]);
2786 /* Build the string. */
2787 ret = ptr = (char *) xmalloc (len);
2788 line_len = 0;
2790 for (i = 0; i < num; i++)
2792 size_t len2[2];
2794 for (j = 0; j < 2; j++)
2795 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2797 if (i != 0)
2799 *ptr++ = ' ';
2800 line_len++;
2802 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2804 *ptr++ = '\\';
2805 *ptr++ = '\n';
2806 line_len = 0;
2810 for (j = 0; j < 2; j++)
2811 if (opts[i][j])
2813 memcpy (ptr, opts[i][j], len2[j]);
2814 ptr += len2[j];
2815 line_len += len2[j];
2819 *ptr = '\0';
2820 gcc_assert (ret + len >= ptr);
2822 return ret;
2825 /* Return true, if profiling code should be emitted before
2826 prologue. Otherwise it returns false.
2827 Note: For x86 with "hotfix" it is sorried. */
2828 static bool
2829 ix86_profile_before_prologue (void)
2831 return flag_fentry != 0;
2834 /* Function that is callable from the debugger to print the current
2835 options. */
2836 void ATTRIBUTE_UNUSED
2837 ix86_debug_options (void)
2839 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2840 ix86_arch_string, ix86_tune_string,
2841 ix86_fpmath, true);
2843 if (opts)
2845 fprintf (stderr, "%s\n\n", opts);
2846 free (opts);
2848 else
2849 fputs ("<no options>\n\n", stderr);
2851 return;
2854 static const char *stringop_alg_names[] = {
2855 #define DEF_ENUM
2856 #define DEF_ALG(alg, name) #name,
2857 #include "stringop.def"
2858 #undef DEF_ENUM
2859 #undef DEF_ALG
2862 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2863 The string is of the following form (or comma separated list of it):
2865 strategy_alg:max_size:[align|noalign]
2867 where the full size range for the strategy is either [0, max_size] or
2868 [min_size, max_size], in which min_size is the max_size + 1 of the
2869 preceding range. The last size range must have max_size == -1.
2871 Examples:
2874 -mmemcpy-strategy=libcall:-1:noalign
2876 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2880 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2882 This is to tell the compiler to use the following strategy for memset
2883 1) when the expected size is between [1, 16], use rep_8byte strategy;
2884 2) when the size is between [17, 2048], use vector_loop;
2885 3) when the size is > 2048, use libcall. */
2887 struct stringop_size_range
2889 int max;
2890 stringop_alg alg;
2891 bool noalign;
2894 static void
2895 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2897 const struct stringop_algs *default_algs;
2898 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2899 char *curr_range_str, *next_range_str;
2900 int i = 0, n = 0;
2902 if (is_memset)
2903 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2904 else
2905 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2907 curr_range_str = strategy_str;
2911 int maxs;
2912 char alg_name[128];
2913 char align[16];
2914 next_range_str = strchr (curr_range_str, ',');
2915 if (next_range_str)
2916 *next_range_str++ = '\0';
2918 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2919 alg_name, &maxs, align))
2921 error ("wrong arg %s to option %s", curr_range_str,
2922 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2923 return;
2926 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2928 error ("size ranges of option %s should be increasing",
2929 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2930 return;
2933 for (i = 0; i < last_alg; i++)
2934 if (!strcmp (alg_name, stringop_alg_names[i]))
2935 break;
2937 if (i == last_alg)
2939 error ("wrong stringop strategy name %s specified for option %s",
2940 alg_name,
2941 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2942 return;
2945 input_ranges[n].max = maxs;
2946 input_ranges[n].alg = (stringop_alg) i;
2947 if (!strcmp (align, "align"))
2948 input_ranges[n].noalign = false;
2949 else if (!strcmp (align, "noalign"))
2950 input_ranges[n].noalign = true;
2951 else
2953 error ("unknown alignment %s specified for option %s",
2954 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2955 return;
2957 n++;
2958 curr_range_str = next_range_str;
2960 while (curr_range_str);
2962 if (input_ranges[n - 1].max != -1)
2964 error ("the max value for the last size range should be -1"
2965 " for option %s",
2966 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2967 return;
2970 if (n > MAX_STRINGOP_ALGS)
2972 error ("too many size ranges specified in option %s",
2973 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2974 return;
2977 /* Now override the default algs array. */
2978 for (i = 0; i < n; i++)
2980 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2981 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2982 = input_ranges[i].alg;
2983 *const_cast<int *>(&default_algs->size[i].noalign)
2984 = input_ranges[i].noalign;
2989 /* parse -mtune-ctrl= option. When DUMP is true,
2990 print the features that are explicitly set. */
2992 static void
2993 parse_mtune_ctrl_str (bool dump)
2995 if (!ix86_tune_ctrl_string)
2996 return;
2998 char *next_feature_string = NULL;
2999 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3000 char *orig = curr_feature_string;
3001 int i;
3004 bool clear = false;
3006 next_feature_string = strchr (curr_feature_string, ',');
3007 if (next_feature_string)
3008 *next_feature_string++ = '\0';
3009 if (*curr_feature_string == '^')
3011 curr_feature_string++;
3012 clear = true;
3014 for (i = 0; i < X86_TUNE_LAST; i++)
3016 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3018 ix86_tune_features[i] = !clear;
3019 if (dump)
3020 fprintf (stderr, "Explicitly %s feature %s\n",
3021 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3022 break;
3025 if (i == X86_TUNE_LAST)
3026 error ("Unknown parameter to option -mtune-ctrl: %s",
3027 clear ? curr_feature_string - 1 : curr_feature_string);
3028 curr_feature_string = next_feature_string;
3030 while (curr_feature_string);
3031 free (orig);
3034 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3035 processor type. */
3037 static void
3038 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3040 unsigned int ix86_tune_mask = 1u << ix86_tune;
3041 int i;
3043 for (i = 0; i < X86_TUNE_LAST; ++i)
3045 if (ix86_tune_no_default)
3046 ix86_tune_features[i] = 0;
3047 else
3048 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3051 if (dump)
3053 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3054 for (i = 0; i < X86_TUNE_LAST; i++)
3055 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3056 ix86_tune_features[i] ? "on" : "off");
3059 parse_mtune_ctrl_str (dump);
3063 /* Override various settings based on options. If MAIN_ARGS_P, the
3064 options are from the command line, otherwise they are from
3065 attributes. */
3067 static void
3068 ix86_option_override_internal (bool main_args_p,
3069 struct gcc_options *opts,
3070 struct gcc_options *opts_set)
3072 int i;
3073 unsigned int ix86_arch_mask;
3074 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3075 const char *prefix;
3076 const char *suffix;
3077 const char *sw;
3079 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3080 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3081 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3082 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3083 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3084 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3085 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3086 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3087 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3088 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3089 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3090 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3091 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3092 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3093 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3094 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3095 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3096 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3097 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3098 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3099 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3100 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3101 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3102 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3103 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3104 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3105 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3106 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3107 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3108 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3109 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3110 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3111 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3112 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3113 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3114 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3115 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3116 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3117 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3118 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3119 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3120 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3121 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3122 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3123 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3124 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3125 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3126 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3127 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3128 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3129 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3130 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3132 #define PTA_CORE2 \
3133 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3134 | PTA_CX16 | PTA_FXSR)
3135 #define PTA_NEHALEM \
3136 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3137 #define PTA_WESTMERE \
3138 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3139 #define PTA_SANDYBRIDGE \
3140 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3141 #define PTA_IVYBRIDGE \
3142 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3143 #define PTA_HASWELL \
3144 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3145 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3146 #define PTA_BROADWELL \
3147 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3148 #define PTA_BONNELL \
3149 (PTA_CORE2 | PTA_MOVBE)
3150 #define PTA_SILVERMONT \
3151 (PTA_WESTMERE | PTA_MOVBE)
3153 /* if this reaches 64, need to widen struct pta flags below */
3155 static struct pta
3157 const char *const name; /* processor name or nickname. */
3158 const enum processor_type processor;
3159 const enum attr_cpu schedule;
3160 const unsigned HOST_WIDE_INT flags;
3162 const processor_alias_table[] =
3164 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3165 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3166 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3167 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3168 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3169 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3170 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3171 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3172 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3173 PTA_MMX | PTA_SSE | PTA_FXSR},
3174 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3175 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3176 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3177 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3178 PTA_MMX | PTA_SSE | PTA_FXSR},
3179 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3180 PTA_MMX | PTA_SSE | PTA_FXSR},
3181 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3182 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3183 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3184 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3185 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3186 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3187 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3188 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3189 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3190 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3191 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3192 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3193 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3194 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3195 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3196 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3197 PTA_SANDYBRIDGE},
3198 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3199 PTA_SANDYBRIDGE},
3200 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3201 PTA_IVYBRIDGE},
3202 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3203 PTA_IVYBRIDGE},
3204 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3205 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3206 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3207 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3208 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3209 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3210 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3211 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3212 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3214 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3215 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3216 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3217 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3218 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3219 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3220 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3221 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3223 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3224 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3225 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3226 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3227 {"x86-64", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3229 {"k8", PROCESSOR_K8, CPU_K8,
3230 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3231 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3232 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3233 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3234 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3235 {"opteron", PROCESSOR_K8, CPU_K8,
3236 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3237 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3238 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3239 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3240 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3241 {"athlon64", PROCESSOR_K8, CPU_K8,
3242 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3243 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3244 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3245 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3246 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3247 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3248 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3249 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3250 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3251 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3252 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3253 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3254 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3255 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3256 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3261 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3262 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3263 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3264 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3265 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3266 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3267 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3268 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3269 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3270 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3271 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3272 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3273 | PTA_XSAVEOPT | PTA_FSGSBASE},
3274 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3275 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3276 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3277 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3278 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3279 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3280 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3281 | PTA_MOVBE},
3282 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3285 | PTA_FXSR | PTA_XSAVE},
3286 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3289 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3290 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3291 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3293 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3294 PTA_64BIT
3295 | PTA_HLE /* flags are only used for -march switch. */ },
3298 /* -mrecip options. */
3299 static struct
3301 const char *string; /* option name */
3302 unsigned int mask; /* mask bits to set */
3304 const recip_options[] =
3306 { "all", RECIP_MASK_ALL },
3307 { "none", RECIP_MASK_NONE },
3308 { "div", RECIP_MASK_DIV },
3309 { "sqrt", RECIP_MASK_SQRT },
3310 { "vec-div", RECIP_MASK_VEC_DIV },
3311 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3314 int const pta_size = ARRAY_SIZE (processor_alias_table);
3316 /* Set up prefix/suffix so the error messages refer to either the command
3317 line argument, or the attribute(target). */
3318 if (main_args_p)
3320 prefix = "-m";
3321 suffix = "";
3322 sw = "switch";
3324 else
3326 prefix = "option(\"";
3327 suffix = "\")";
3328 sw = "attribute";
3331 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3332 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3333 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3335 #ifdef TARGET_BI_ARCH
3336 else
3338 #if TARGET_BI_ARCH == 1
3339 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3340 is on and OPTION_MASK_ABI_X32 is off. We turn off
3341 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3342 -mx32. */
3343 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3345 #else
3346 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3347 on and OPTION_MASK_ABI_64 is off. We turn off
3348 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3349 -m64. */
3350 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3351 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3352 #endif
3354 #endif
3356 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3358 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3359 OPTION_MASK_ABI_64 for TARGET_X32. */
3360 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3361 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3363 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3364 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3365 | OPTION_MASK_ABI_X32
3366 | OPTION_MASK_ABI_64);
3367 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3369 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3370 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3371 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3372 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3375 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3376 SUBTARGET_OVERRIDE_OPTIONS;
3377 #endif
3379 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3380 SUBSUBTARGET_OVERRIDE_OPTIONS;
3381 #endif
3383 /* -fPIC is the default for x86_64. */
3384 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3385 opts->x_flag_pic = 2;
3387 /* Need to check -mtune=generic first. */
3388 if (opts->x_ix86_tune_string)
3390 /* As special support for cross compilers we read -mtune=native
3391 as -mtune=generic. With native compilers we won't see the
3392 -mtune=native, as it was changed by the driver. */
3393 if (!strcmp (opts->x_ix86_tune_string, "native"))
3395 opts->x_ix86_tune_string = "generic";
3397 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3398 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3399 "%stune=k8%s or %stune=generic%s instead as appropriate",
3400 prefix, suffix, prefix, suffix, prefix, suffix);
3402 else
3404 if (opts->x_ix86_arch_string)
3405 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3406 if (!opts->x_ix86_tune_string)
3408 opts->x_ix86_tune_string
3409 = processor_target_table[TARGET_CPU_DEFAULT].name;
3410 ix86_tune_defaulted = 1;
3413 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3414 or defaulted. We need to use a sensible tune option. */
3415 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3417 opts->x_ix86_tune_string = "generic";
3421 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3422 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3424 /* rep; movq isn't available in 32-bit code. */
3425 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3426 opts->x_ix86_stringop_alg = no_stringop;
3429 if (!opts->x_ix86_arch_string)
3430 opts->x_ix86_arch_string
3431 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3432 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3433 else
3434 ix86_arch_specified = 1;
3436 if (opts_set->x_ix86_pmode)
3438 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3439 && opts->x_ix86_pmode == PMODE_SI)
3440 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3441 && opts->x_ix86_pmode == PMODE_DI))
3442 error ("address mode %qs not supported in the %s bit mode",
3443 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3444 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3446 else
3447 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3448 ? PMODE_DI : PMODE_SI;
3450 if (!opts_set->x_ix86_abi)
3451 opts->x_ix86_abi = DEFAULT_ABI;
3453 /* For targets using ms ABI enable ms-extensions, if not
3454 explicit turned off. For non-ms ABI we turn off this
3455 option. */
3456 if (!opts_set->x_flag_ms_extensions)
3457 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3459 if (opts_set->x_ix86_cmodel)
3461 switch (opts->x_ix86_cmodel)
3463 case CM_SMALL:
3464 case CM_SMALL_PIC:
3465 if (opts->x_flag_pic)
3466 opts->x_ix86_cmodel = CM_SMALL_PIC;
3467 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3468 error ("code model %qs not supported in the %s bit mode",
3469 "small", "32");
3470 break;
3472 case CM_MEDIUM:
3473 case CM_MEDIUM_PIC:
3474 if (opts->x_flag_pic)
3475 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3476 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3477 error ("code model %qs not supported in the %s bit mode",
3478 "medium", "32");
3479 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3480 error ("code model %qs not supported in x32 mode",
3481 "medium");
3482 break;
3484 case CM_LARGE:
3485 case CM_LARGE_PIC:
3486 if (opts->x_flag_pic)
3487 opts->x_ix86_cmodel = CM_LARGE_PIC;
3488 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3489 error ("code model %qs not supported in the %s bit mode",
3490 "large", "32");
3491 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3492 error ("code model %qs not supported in x32 mode",
3493 "large");
3494 break;
3496 case CM_32:
3497 if (opts->x_flag_pic)
3498 error ("code model %s does not support PIC mode", "32");
3499 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3500 error ("code model %qs not supported in the %s bit mode",
3501 "32", "64");
3502 break;
3504 case CM_KERNEL:
3505 if (opts->x_flag_pic)
3507 error ("code model %s does not support PIC mode", "kernel");
3508 opts->x_ix86_cmodel = CM_32;
3510 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3511 error ("code model %qs not supported in the %s bit mode",
3512 "kernel", "32");
3513 break;
3515 default:
3516 gcc_unreachable ();
3519 else
3521 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3522 use of rip-relative addressing. This eliminates fixups that
3523 would otherwise be needed if this object is to be placed in a
3524 DLL, and is essentially just as efficient as direct addressing. */
3525 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3526 && (TARGET_RDOS || TARGET_PECOFF))
3527 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3528 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3529 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3530 else
3531 opts->x_ix86_cmodel = CM_32;
3533 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3535 error ("-masm=intel not supported in this configuration");
3536 opts->x_ix86_asm_dialect = ASM_ATT;
3538 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3539 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3540 sorry ("%i-bit mode not compiled in",
3541 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3543 for (i = 0; i < pta_size; i++)
3544 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3546 ix86_schedule = processor_alias_table[i].schedule;
3547 ix86_arch = processor_alias_table[i].processor;
3548 /* Default cpu tuning to the architecture. */
3549 ix86_tune = ix86_arch;
3551 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3552 && !(processor_alias_table[i].flags & PTA_64BIT))
3553 error ("CPU you selected does not support x86-64 "
3554 "instruction set");
3556 if (processor_alias_table[i].flags & PTA_MMX
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3559 if (processor_alias_table[i].flags & PTA_3DNOW
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3562 if (processor_alias_table[i].flags & PTA_3DNOW_A
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3565 if (processor_alias_table[i].flags & PTA_SSE
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3568 if (processor_alias_table[i].flags & PTA_SSE2
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3571 if (processor_alias_table[i].flags & PTA_SSE3
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3574 if (processor_alias_table[i].flags & PTA_SSSE3
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3577 if (processor_alias_table[i].flags & PTA_SSE4_1
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3580 if (processor_alias_table[i].flags & PTA_SSE4_2
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3583 if (processor_alias_table[i].flags & PTA_AVX
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3586 if (processor_alias_table[i].flags & PTA_AVX2
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3589 if (processor_alias_table[i].flags & PTA_FMA
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3592 if (processor_alias_table[i].flags & PTA_SSE4A
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3595 if (processor_alias_table[i].flags & PTA_FMA4
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3598 if (processor_alias_table[i].flags & PTA_XOP
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3601 if (processor_alias_table[i].flags & PTA_LWP
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3604 if (processor_alias_table[i].flags & PTA_ABM
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3607 if (processor_alias_table[i].flags & PTA_BMI
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3610 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3613 if (processor_alias_table[i].flags & PTA_TBM
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3616 if (processor_alias_table[i].flags & PTA_BMI2
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3619 if (processor_alias_table[i].flags & PTA_CX16
3620 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3621 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3622 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3625 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3626 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3629 if (processor_alias_table[i].flags & PTA_MOVBE
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3632 if (processor_alias_table[i].flags & PTA_AES
3633 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3634 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3635 if (processor_alias_table[i].flags & PTA_SHA
3636 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3637 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3638 if (processor_alias_table[i].flags & PTA_PCLMUL
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3641 if (processor_alias_table[i].flags & PTA_FSGSBASE
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3644 if (processor_alias_table[i].flags & PTA_RDRND
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3647 if (processor_alias_table[i].flags & PTA_F16C
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3650 if (processor_alias_table[i].flags & PTA_RTM
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3653 if (processor_alias_table[i].flags & PTA_HLE
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3656 if (processor_alias_table[i].flags & PTA_PRFCHW
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3659 if (processor_alias_table[i].flags & PTA_RDSEED
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3662 if (processor_alias_table[i].flags & PTA_ADX
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3665 if (processor_alias_table[i].flags & PTA_FXSR
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3668 if (processor_alias_table[i].flags & PTA_XSAVE
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3671 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3674 if (processor_alias_table[i].flags & PTA_AVX512F
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3677 if (processor_alias_table[i].flags & PTA_AVX512ER
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3680 if (processor_alias_table[i].flags & PTA_AVX512PF
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3683 if (processor_alias_table[i].flags & PTA_AVX512CD
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3686 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3689 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3692 if (processor_alias_table[i].flags & PTA_XSAVEC
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3695 if (processor_alias_table[i].flags & PTA_XSAVES
3696 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3697 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3698 if (processor_alias_table[i].flags & PTA_AVX512DQ
3699 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3700 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3701 if (processor_alias_table[i].flags & PTA_AVX512BW
3702 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3703 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3704 if (processor_alias_table[i].flags & PTA_AVX512VL
3705 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3706 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3707 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3708 x86_prefetch_sse = true;
3710 break;
3713 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3714 error ("generic CPU can be used only for %stune=%s %s",
3715 prefix, suffix, sw);
3716 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3717 error ("intel CPU can be used only for %stune=%s %s",
3718 prefix, suffix, sw);
3719 else if (i == pta_size)
3720 error ("bad value (%s) for %sarch=%s %s",
3721 opts->x_ix86_arch_string, prefix, suffix, sw);
3723 ix86_arch_mask = 1u << ix86_arch;
3724 for (i = 0; i < X86_ARCH_LAST; ++i)
3725 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3727 for (i = 0; i < pta_size; i++)
3728 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3730 ix86_schedule = processor_alias_table[i].schedule;
3731 ix86_tune = processor_alias_table[i].processor;
3732 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3734 if (!(processor_alias_table[i].flags & PTA_64BIT))
3736 if (ix86_tune_defaulted)
3738 opts->x_ix86_tune_string = "x86-64";
3739 for (i = 0; i < pta_size; i++)
3740 if (! strcmp (opts->x_ix86_tune_string,
3741 processor_alias_table[i].name))
3742 break;
3743 ix86_schedule = processor_alias_table[i].schedule;
3744 ix86_tune = processor_alias_table[i].processor;
3746 else
3747 error ("CPU you selected does not support x86-64 "
3748 "instruction set");
3751 /* Intel CPUs have always interpreted SSE prefetch instructions as
3752 NOPs; so, we can enable SSE prefetch instructions even when
3753 -mtune (rather than -march) points us to a processor that has them.
3754 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3755 higher processors. */
3756 if (TARGET_CMOV
3757 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3758 x86_prefetch_sse = true;
3759 break;
3762 if (ix86_tune_specified && i == pta_size)
3763 error ("bad value (%s) for %stune=%s %s",
3764 opts->x_ix86_tune_string, prefix, suffix, sw);
3766 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3768 #ifndef USE_IX86_FRAME_POINTER
3769 #define USE_IX86_FRAME_POINTER 0
3770 #endif
3772 #ifndef USE_X86_64_FRAME_POINTER
3773 #define USE_X86_64_FRAME_POINTER 0
3774 #endif
3776 /* Set the default values for switches whose default depends on TARGET_64BIT
3777 in case they weren't overwritten by command line options. */
3778 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3780 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3781 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3782 if (opts->x_flag_asynchronous_unwind_tables
3783 && !opts_set->x_flag_unwind_tables
3784 && TARGET_64BIT_MS_ABI)
3785 opts->x_flag_unwind_tables = 1;
3786 if (opts->x_flag_asynchronous_unwind_tables == 2)
3787 opts->x_flag_unwind_tables
3788 = opts->x_flag_asynchronous_unwind_tables = 1;
3789 if (opts->x_flag_pcc_struct_return == 2)
3790 opts->x_flag_pcc_struct_return = 0;
3792 else
3794 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3795 opts->x_flag_omit_frame_pointer
3796 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3797 if (opts->x_flag_asynchronous_unwind_tables == 2)
3798 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3799 if (opts->x_flag_pcc_struct_return == 2)
3800 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3803 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3804 if (opts->x_optimize_size)
3805 ix86_cost = &ix86_size_cost;
3806 else
3807 ix86_cost = ix86_tune_cost;
3809 /* Arrange to set up i386_stack_locals for all functions. */
3810 init_machine_status = ix86_init_machine_status;
3812 /* Validate -mregparm= value. */
3813 if (opts_set->x_ix86_regparm)
3815 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3816 warning (0, "-mregparm is ignored in 64-bit mode");
3817 if (opts->x_ix86_regparm > REGPARM_MAX)
3819 error ("-mregparm=%d is not between 0 and %d",
3820 opts->x_ix86_regparm, REGPARM_MAX);
3821 opts->x_ix86_regparm = 0;
3824 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3825 opts->x_ix86_regparm = REGPARM_MAX;
3827 /* Default align_* from the processor table. */
3828 if (opts->x_align_loops == 0)
3830 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3831 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3833 if (opts->x_align_jumps == 0)
3835 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3836 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3838 if (opts->x_align_functions == 0)
3840 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3843 /* Provide default for -mbranch-cost= value. */
3844 if (!opts_set->x_ix86_branch_cost)
3845 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3847 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3849 opts->x_target_flags
3850 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3852 /* Enable by default the SSE and MMX builtins. Do allow the user to
3853 explicitly disable any of these. In particular, disabling SSE and
3854 MMX for kernel code is extremely useful. */
3855 if (!ix86_arch_specified)
3856 opts->x_ix86_isa_flags
3857 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3858 | TARGET_SUBTARGET64_ISA_DEFAULT)
3859 & ~opts->x_ix86_isa_flags_explicit);
3861 if (TARGET_RTD_P (opts->x_target_flags))
3862 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3864 else
3866 opts->x_target_flags
3867 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3869 if (!ix86_arch_specified)
3870 opts->x_ix86_isa_flags
3871 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3873 /* i386 ABI does not specify red zone. It still makes sense to use it
3874 when programmer takes care to stack from being destroyed. */
3875 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3876 opts->x_target_flags |= MASK_NO_RED_ZONE;
3879 /* Keep nonleaf frame pointers. */
3880 if (opts->x_flag_omit_frame_pointer)
3881 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3882 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3883 opts->x_flag_omit_frame_pointer = 1;
3885 /* If we're doing fast math, we don't care about comparison order
3886 wrt NaNs. This lets us use a shorter comparison sequence. */
3887 if (opts->x_flag_finite_math_only)
3888 opts->x_target_flags &= ~MASK_IEEE_FP;
3890 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3891 since the insns won't need emulation. */
3892 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3893 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3895 /* Likewise, if the target doesn't have a 387, or we've specified
3896 software floating point, don't use 387 inline intrinsics. */
3897 if (!TARGET_80387_P (opts->x_target_flags))
3898 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3900 /* Turn on MMX builtins for -msse. */
3901 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3902 opts->x_ix86_isa_flags
3903 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3905 /* Enable SSE prefetch. */
3906 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3907 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3908 x86_prefetch_sse = true;
3910 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3911 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3912 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3913 opts->x_ix86_isa_flags
3914 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3916 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3917 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3918 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3919 opts->x_ix86_isa_flags
3920 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3922 /* Enable lzcnt instruction for -mabm. */
3923 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3924 opts->x_ix86_isa_flags
3925 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3927 /* Validate -mpreferred-stack-boundary= value or default it to
3928 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3929 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3930 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3932 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3933 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3934 int max = (TARGET_SEH ? 4 : 12);
3936 if (opts->x_ix86_preferred_stack_boundary_arg < min
3937 || opts->x_ix86_preferred_stack_boundary_arg > max)
3939 if (min == max)
3940 error ("-mpreferred-stack-boundary is not supported "
3941 "for this target");
3942 else
3943 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3944 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3946 else
3947 ix86_preferred_stack_boundary
3948 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3951 /* Set the default value for -mstackrealign. */
3952 if (opts->x_ix86_force_align_arg_pointer == -1)
3953 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3955 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3957 /* Validate -mincoming-stack-boundary= value or default it to
3958 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3959 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3960 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3962 if (opts->x_ix86_incoming_stack_boundary_arg
3963 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3964 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3965 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3966 opts->x_ix86_incoming_stack_boundary_arg,
3967 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3968 else
3970 ix86_user_incoming_stack_boundary
3971 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3972 ix86_incoming_stack_boundary
3973 = ix86_user_incoming_stack_boundary;
3977 /* Accept -msseregparm only if at least SSE support is enabled. */
3978 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3979 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3980 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3982 if (opts_set->x_ix86_fpmath)
3984 if (opts->x_ix86_fpmath & FPMATH_SSE)
3986 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3988 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3989 opts->x_ix86_fpmath = FPMATH_387;
3991 else if ((opts->x_ix86_fpmath & FPMATH_387)
3992 && !TARGET_80387_P (opts->x_target_flags))
3994 warning (0, "387 instruction set disabled, using SSE arithmetics");
3995 opts->x_ix86_fpmath = FPMATH_SSE;
3999 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4000 fpmath=387. The second is however default at many targets since the
4001 extra 80bit precision of temporaries is considered to be part of ABI.
4002 Overwrite the default at least for -ffast-math.
4003 TODO: -mfpmath=both seems to produce same performing code with bit
4004 smaller binaries. It is however not clear if register allocation is
4005 ready for this setting.
4006 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4007 codegen. We may switch to 387 with -ffast-math for size optimized
4008 functions. */
4009 else if (fast_math_flags_set_p (&global_options)
4010 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4011 opts->x_ix86_fpmath = FPMATH_SSE;
4012 else
4013 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4015 /* If the i387 is disabled, then do not return values in it. */
4016 if (!TARGET_80387_P (opts->x_target_flags))
4017 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4019 /* Use external vectorized library in vectorizing intrinsics. */
4020 if (opts_set->x_ix86_veclibabi_type)
4021 switch (opts->x_ix86_veclibabi_type)
4023 case ix86_veclibabi_type_svml:
4024 ix86_veclib_handler = ix86_veclibabi_svml;
4025 break;
4027 case ix86_veclibabi_type_acml:
4028 ix86_veclib_handler = ix86_veclibabi_acml;
4029 break;
4031 default:
4032 gcc_unreachable ();
4035 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4036 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4037 && !opts->x_optimize_size)
4038 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4040 /* If stack probes are required, the space used for large function
4041 arguments on the stack must also be probed, so enable
4042 -maccumulate-outgoing-args so this happens in the prologue. */
4043 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4044 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4046 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4047 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4048 "for correctness", prefix, suffix);
4049 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4052 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4054 char *p;
4055 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4056 p = strchr (internal_label_prefix, 'X');
4057 internal_label_prefix_len = p - internal_label_prefix;
4058 *p = '\0';
4061 /* When scheduling description is not available, disable scheduler pass
4062 so it won't slow down the compilation and make x87 code slower. */
4063 if (!TARGET_SCHEDULE)
4064 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4066 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4067 ix86_tune_cost->simultaneous_prefetches,
4068 opts->x_param_values,
4069 opts_set->x_param_values);
4070 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4071 ix86_tune_cost->prefetch_block,
4072 opts->x_param_values,
4073 opts_set->x_param_values);
4074 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4075 ix86_tune_cost->l1_cache_size,
4076 opts->x_param_values,
4077 opts_set->x_param_values);
4078 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4079 ix86_tune_cost->l2_cache_size,
4080 opts->x_param_values,
4081 opts_set->x_param_values);
4083 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4084 if (opts->x_flag_prefetch_loop_arrays < 0
4085 && HAVE_prefetch
4086 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4087 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4088 opts->x_flag_prefetch_loop_arrays = 1;
4090 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4091 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4092 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4093 targetm.expand_builtin_va_start = NULL;
4095 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4097 ix86_gen_leave = gen_leave_rex64;
4098 if (Pmode == DImode)
4100 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4101 ix86_gen_tls_local_dynamic_base_64
4102 = gen_tls_local_dynamic_base_64_di;
4104 else
4106 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4107 ix86_gen_tls_local_dynamic_base_64
4108 = gen_tls_local_dynamic_base_64_si;
4111 else
4112 ix86_gen_leave = gen_leave;
4114 if (Pmode == DImode)
4116 ix86_gen_add3 = gen_adddi3;
4117 ix86_gen_sub3 = gen_subdi3;
4118 ix86_gen_sub3_carry = gen_subdi3_carry;
4119 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4120 ix86_gen_andsp = gen_anddi3;
4121 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4122 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4123 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4124 ix86_gen_monitor = gen_sse3_monitor_di;
4126 else
4128 ix86_gen_add3 = gen_addsi3;
4129 ix86_gen_sub3 = gen_subsi3;
4130 ix86_gen_sub3_carry = gen_subsi3_carry;
4131 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4132 ix86_gen_andsp = gen_andsi3;
4133 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4134 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4135 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4136 ix86_gen_monitor = gen_sse3_monitor_si;
4139 #ifdef USE_IX86_CLD
4140 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4141 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4142 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4143 #endif
4145 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4147 if (opts->x_flag_fentry > 0)
4148 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4149 "with -fpic");
4150 opts->x_flag_fentry = 0;
4152 else if (TARGET_SEH)
4154 if (opts->x_flag_fentry == 0)
4155 sorry ("-mno-fentry isn%'t compatible with SEH");
4156 opts->x_flag_fentry = 1;
4158 else if (opts->x_flag_fentry < 0)
4160 #if defined(PROFILE_BEFORE_PROLOGUE)
4161 opts->x_flag_fentry = 1;
4162 #else
4163 opts->x_flag_fentry = 0;
4164 #endif
4167 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4168 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4169 AVX unaligned load/store. */
4170 if (!opts->x_optimize_size)
4172 if (flag_expensive_optimizations
4173 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4174 opts->x_target_flags |= MASK_VZEROUPPER;
4175 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4176 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4177 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4178 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4179 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4180 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4181 /* Enable 128-bit AVX instruction generation
4182 for the auto-vectorizer. */
4183 if (TARGET_AVX128_OPTIMAL
4184 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4185 opts->x_target_flags |= MASK_PREFER_AVX128;
4188 if (opts->x_ix86_recip_name)
4190 char *p = ASTRDUP (opts->x_ix86_recip_name);
4191 char *q;
4192 unsigned int mask, i;
4193 bool invert;
4195 while ((q = strtok (p, ",")) != NULL)
4197 p = NULL;
4198 if (*q == '!')
4200 invert = true;
4201 q++;
4203 else
4204 invert = false;
4206 if (!strcmp (q, "default"))
4207 mask = RECIP_MASK_ALL;
4208 else
4210 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4211 if (!strcmp (q, recip_options[i].string))
4213 mask = recip_options[i].mask;
4214 break;
4217 if (i == ARRAY_SIZE (recip_options))
4219 error ("unknown option for -mrecip=%s", q);
4220 invert = false;
4221 mask = RECIP_MASK_NONE;
4225 opts->x_recip_mask_explicit |= mask;
4226 if (invert)
4227 opts->x_recip_mask &= ~mask;
4228 else
4229 opts->x_recip_mask |= mask;
4233 if (TARGET_RECIP_P (opts->x_target_flags))
4234 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4235 else if (opts_set->x_target_flags & MASK_RECIP)
4236 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4238 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4239 for 64-bit Bionic. */
4240 if (TARGET_HAS_BIONIC
4241 && !(opts_set->x_target_flags
4242 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4243 opts->x_target_flags |= (TARGET_64BIT
4244 ? MASK_LONG_DOUBLE_128
4245 : MASK_LONG_DOUBLE_64);
4247 /* Only one of them can be active. */
4248 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4249 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4251 /* Save the initial options in case the user does function specific
4252 options. */
4253 if (main_args_p)
4254 target_option_default_node = target_option_current_node
4255 = build_target_option_node (opts);
4257 /* Handle stack protector */
4258 if (!opts_set->x_ix86_stack_protector_guard)
4259 opts->x_ix86_stack_protector_guard
4260 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4262 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4263 if (opts->x_ix86_tune_memcpy_strategy)
4265 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4266 ix86_parse_stringop_strategy_string (str, false);
4267 free (str);
4270 if (opts->x_ix86_tune_memset_strategy)
4272 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4273 ix86_parse_stringop_strategy_string (str, true);
4274 free (str);
4278 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4280 static void
4281 ix86_option_override (void)
4283 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4284 static struct register_pass_info insert_vzeroupper_info
4285 = { pass_insert_vzeroupper, "reload",
4286 1, PASS_POS_INSERT_AFTER
4289 ix86_option_override_internal (true, &global_options, &global_options_set);
4292 /* This needs to be done at start up. It's convenient to do it here. */
4293 register_pass (&insert_vzeroupper_info);
4296 /* Update register usage after having seen the compiler flags. */
4298 static void
4299 ix86_conditional_register_usage (void)
4301 int i, c_mask;
4302 unsigned int j;
4304 /* The PIC register, if it exists, is fixed. */
4305 j = PIC_OFFSET_TABLE_REGNUM;
4306 if (j != INVALID_REGNUM)
4307 fixed_regs[j] = call_used_regs[j] = 1;
4309 /* For 32-bit targets, squash the REX registers. */
4310 if (! TARGET_64BIT)
4312 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4313 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4314 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4315 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4316 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4317 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4320 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4321 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4322 : TARGET_64BIT ? (1 << 2)
4323 : (1 << 1));
4325 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4327 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4329 /* Set/reset conditionally defined registers from
4330 CALL_USED_REGISTERS initializer. */
4331 if (call_used_regs[i] > 1)
4332 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4334 /* Calculate registers of CLOBBERED_REGS register set
4335 as call used registers from GENERAL_REGS register set. */
4336 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4337 && call_used_regs[i])
4338 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4341 /* If MMX is disabled, squash the registers. */
4342 if (! TARGET_MMX)
4343 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4344 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4345 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4347 /* If SSE is disabled, squash the registers. */
4348 if (! TARGET_SSE)
4349 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4350 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4351 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4353 /* If the FPU is disabled, squash the registers. */
4354 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4355 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4356 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4357 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4359 /* If AVX512F is disabled, squash the registers. */
4360 if (! TARGET_AVX512F)
4362 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4363 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4365 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4366 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4371 /* Save the current options */
4373 static void
4374 ix86_function_specific_save (struct cl_target_option *ptr,
4375 struct gcc_options *opts)
4377 ptr->arch = ix86_arch;
4378 ptr->schedule = ix86_schedule;
4379 ptr->tune = ix86_tune;
4380 ptr->branch_cost = ix86_branch_cost;
4381 ptr->tune_defaulted = ix86_tune_defaulted;
4382 ptr->arch_specified = ix86_arch_specified;
4383 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4384 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4385 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4386 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4387 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4388 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4389 ptr->x_ix86_abi = opts->x_ix86_abi;
4390 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4391 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4392 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4393 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4394 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4395 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4396 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4397 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4398 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4399 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4400 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4401 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4402 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4403 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4404 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4405 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4406 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4407 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4408 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4409 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4411 /* The fields are char but the variables are not; make sure the
4412 values fit in the fields. */
4413 gcc_assert (ptr->arch == ix86_arch);
4414 gcc_assert (ptr->schedule == ix86_schedule);
4415 gcc_assert (ptr->tune == ix86_tune);
4416 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4419 /* Restore the current options */
4421 static void
4422 ix86_function_specific_restore (struct gcc_options *opts,
4423 struct cl_target_option *ptr)
4425 enum processor_type old_tune = ix86_tune;
4426 enum processor_type old_arch = ix86_arch;
4427 unsigned int ix86_arch_mask;
4428 int i;
4430 /* We don't change -fPIC. */
4431 opts->x_flag_pic = flag_pic;
4433 ix86_arch = (enum processor_type) ptr->arch;
4434 ix86_schedule = (enum attr_cpu) ptr->schedule;
4435 ix86_tune = (enum processor_type) ptr->tune;
4436 opts->x_ix86_branch_cost = ptr->branch_cost;
4437 ix86_tune_defaulted = ptr->tune_defaulted;
4438 ix86_arch_specified = ptr->arch_specified;
4439 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4440 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4441 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4442 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4443 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4444 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4445 opts->x_ix86_abi = ptr->x_ix86_abi;
4446 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4447 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4448 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4449 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4450 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4451 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4452 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4453 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4454 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4455 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4456 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4457 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4458 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4459 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4460 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4461 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4462 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4463 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4464 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4465 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4467 /* Recreate the arch feature tests if the arch changed */
4468 if (old_arch != ix86_arch)
4470 ix86_arch_mask = 1u << ix86_arch;
4471 for (i = 0; i < X86_ARCH_LAST; ++i)
4472 ix86_arch_features[i]
4473 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4476 /* Recreate the tune optimization tests */
4477 if (old_tune != ix86_tune)
4478 set_ix86_tune_features (ix86_tune, false);
4481 /* Print the current options */
4483 static void
4484 ix86_function_specific_print (FILE *file, int indent,
4485 struct cl_target_option *ptr)
4487 char *target_string
4488 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4489 NULL, NULL, ptr->x_ix86_fpmath, false);
4491 gcc_assert (ptr->arch < PROCESSOR_max);
4492 fprintf (file, "%*sarch = %d (%s)\n",
4493 indent, "",
4494 ptr->arch, processor_target_table[ptr->arch].name);
4496 gcc_assert (ptr->tune < PROCESSOR_max);
4497 fprintf (file, "%*stune = %d (%s)\n",
4498 indent, "",
4499 ptr->tune, processor_target_table[ptr->tune].name);
4501 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4503 if (target_string)
4505 fprintf (file, "%*s%s\n", indent, "", target_string);
4506 free (target_string);
4511 /* Inner function to process the attribute((target(...))), take an argument and
4512 set the current options from the argument. If we have a list, recursively go
4513 over the list. */
4515 static bool
4516 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4517 struct gcc_options *opts,
4518 struct gcc_options *opts_set,
4519 struct gcc_options *enum_opts_set)
4521 char *next_optstr;
4522 bool ret = true;
4524 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4525 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4526 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4527 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4528 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4530 enum ix86_opt_type
4532 ix86_opt_unknown,
4533 ix86_opt_yes,
4534 ix86_opt_no,
4535 ix86_opt_str,
4536 ix86_opt_enum,
4537 ix86_opt_isa
4540 static const struct
4542 const char *string;
4543 size_t len;
4544 enum ix86_opt_type type;
4545 int opt;
4546 int mask;
4547 } attrs[] = {
4548 /* isa options */
4549 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4550 IX86_ATTR_ISA ("abm", OPT_mabm),
4551 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4552 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4553 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4554 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4555 IX86_ATTR_ISA ("aes", OPT_maes),
4556 IX86_ATTR_ISA ("sha", OPT_msha),
4557 IX86_ATTR_ISA ("avx", OPT_mavx),
4558 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4559 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4560 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4561 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4562 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4563 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4564 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4565 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4566 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4567 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4568 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4569 IX86_ATTR_ISA ("sse", OPT_msse),
4570 IX86_ATTR_ISA ("sse2", OPT_msse2),
4571 IX86_ATTR_ISA ("sse3", OPT_msse3),
4572 IX86_ATTR_ISA ("sse4", OPT_msse4),
4573 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4574 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4575 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4576 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4577 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4578 IX86_ATTR_ISA ("fma", OPT_mfma),
4579 IX86_ATTR_ISA ("xop", OPT_mxop),
4580 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4581 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4582 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4583 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4584 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4585 IX86_ATTR_ISA ("hle", OPT_mhle),
4586 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4587 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4588 IX86_ATTR_ISA ("adx", OPT_madx),
4589 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4590 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4591 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4592 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4593 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4594 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4595 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4597 /* enum options */
4598 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4600 /* string options */
4601 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4602 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4604 /* flag options */
4605 IX86_ATTR_YES ("cld",
4606 OPT_mcld,
4607 MASK_CLD),
4609 IX86_ATTR_NO ("fancy-math-387",
4610 OPT_mfancy_math_387,
4611 MASK_NO_FANCY_MATH_387),
4613 IX86_ATTR_YES ("ieee-fp",
4614 OPT_mieee_fp,
4615 MASK_IEEE_FP),
4617 IX86_ATTR_YES ("inline-all-stringops",
4618 OPT_minline_all_stringops,
4619 MASK_INLINE_ALL_STRINGOPS),
4621 IX86_ATTR_YES ("inline-stringops-dynamically",
4622 OPT_minline_stringops_dynamically,
4623 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4625 IX86_ATTR_NO ("align-stringops",
4626 OPT_mno_align_stringops,
4627 MASK_NO_ALIGN_STRINGOPS),
4629 IX86_ATTR_YES ("recip",
4630 OPT_mrecip,
4631 MASK_RECIP),
4635 /* If this is a list, recurse to get the options. */
4636 if (TREE_CODE (args) == TREE_LIST)
4638 bool ret = true;
4640 for (; args; args = TREE_CHAIN (args))
4641 if (TREE_VALUE (args)
4642 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4643 p_strings, opts, opts_set,
4644 enum_opts_set))
4645 ret = false;
4647 return ret;
4650 else if (TREE_CODE (args) != STRING_CST)
4652 error ("attribute %<target%> argument not a string");
4653 return false;
4656 /* Handle multiple arguments separated by commas. */
4657 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4659 while (next_optstr && *next_optstr != '\0')
4661 char *p = next_optstr;
4662 char *orig_p = p;
4663 char *comma = strchr (next_optstr, ',');
4664 const char *opt_string;
4665 size_t len, opt_len;
4666 int opt;
4667 bool opt_set_p;
4668 char ch;
4669 unsigned i;
4670 enum ix86_opt_type type = ix86_opt_unknown;
4671 int mask = 0;
4673 if (comma)
4675 *comma = '\0';
4676 len = comma - next_optstr;
4677 next_optstr = comma + 1;
4679 else
4681 len = strlen (p);
4682 next_optstr = NULL;
4685 /* Recognize no-xxx. */
4686 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4688 opt_set_p = false;
4689 p += 3;
4690 len -= 3;
4692 else
4693 opt_set_p = true;
4695 /* Find the option. */
4696 ch = *p;
4697 opt = N_OPTS;
4698 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4700 type = attrs[i].type;
4701 opt_len = attrs[i].len;
4702 if (ch == attrs[i].string[0]
4703 && ((type != ix86_opt_str && type != ix86_opt_enum)
4704 ? len == opt_len
4705 : len > opt_len)
4706 && memcmp (p, attrs[i].string, opt_len) == 0)
4708 opt = attrs[i].opt;
4709 mask = attrs[i].mask;
4710 opt_string = attrs[i].string;
4711 break;
4715 /* Process the option. */
4716 if (opt == N_OPTS)
4718 error ("attribute(target(\"%s\")) is unknown", orig_p);
4719 ret = false;
4722 else if (type == ix86_opt_isa)
4724 struct cl_decoded_option decoded;
4726 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4727 ix86_handle_option (opts, opts_set,
4728 &decoded, input_location);
4731 else if (type == ix86_opt_yes || type == ix86_opt_no)
4733 if (type == ix86_opt_no)
4734 opt_set_p = !opt_set_p;
4736 if (opt_set_p)
4737 opts->x_target_flags |= mask;
4738 else
4739 opts->x_target_flags &= ~mask;
4742 else if (type == ix86_opt_str)
4744 if (p_strings[opt])
4746 error ("option(\"%s\") was already specified", opt_string);
4747 ret = false;
4749 else
4750 p_strings[opt] = xstrdup (p + opt_len);
4753 else if (type == ix86_opt_enum)
4755 bool arg_ok;
4756 int value;
4758 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4759 if (arg_ok)
4760 set_option (opts, enum_opts_set, opt, value,
4761 p + opt_len, DK_UNSPECIFIED, input_location,
4762 global_dc);
4763 else
4765 error ("attribute(target(\"%s\")) is unknown", orig_p);
4766 ret = false;
4770 else
4771 gcc_unreachable ();
4774 return ret;
4777 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4779 tree
4780 ix86_valid_target_attribute_tree (tree args,
4781 struct gcc_options *opts,
4782 struct gcc_options *opts_set)
4784 const char *orig_arch_string = opts->x_ix86_arch_string;
4785 const char *orig_tune_string = opts->x_ix86_tune_string;
4786 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4787 int orig_tune_defaulted = ix86_tune_defaulted;
4788 int orig_arch_specified = ix86_arch_specified;
4789 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4790 tree t = NULL_TREE;
4791 int i;
4792 struct cl_target_option *def
4793 = TREE_TARGET_OPTION (target_option_default_node);
4794 struct gcc_options enum_opts_set;
4796 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4798 /* Process each of the options on the chain. */
4799 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4800 opts_set, &enum_opts_set))
4801 return error_mark_node;
4803 /* If the changed options are different from the default, rerun
4804 ix86_option_override_internal, and then save the options away.
4805 The string options are are attribute options, and will be undone
4806 when we copy the save structure. */
4807 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4808 || opts->x_target_flags != def->x_target_flags
4809 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4810 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4811 || enum_opts_set.x_ix86_fpmath)
4813 /* If we are using the default tune= or arch=, undo the string assigned,
4814 and use the default. */
4815 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4816 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4817 else if (!orig_arch_specified)
4818 opts->x_ix86_arch_string = NULL;
4820 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4821 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4822 else if (orig_tune_defaulted)
4823 opts->x_ix86_tune_string = NULL;
4825 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4826 if (enum_opts_set.x_ix86_fpmath)
4827 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4828 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4829 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4831 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4832 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4835 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4836 ix86_option_override_internal (false, opts, opts_set);
4838 /* Add any builtin functions with the new isa if any. */
4839 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4841 /* Save the current options unless we are validating options for
4842 #pragma. */
4843 t = build_target_option_node (opts);
4845 opts->x_ix86_arch_string = orig_arch_string;
4846 opts->x_ix86_tune_string = orig_tune_string;
4847 opts_set->x_ix86_fpmath = orig_fpmath_set;
4849 /* Free up memory allocated to hold the strings */
4850 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4851 free (option_strings[i]);
4854 return t;
4857 /* Hook to validate attribute((target("string"))). */
4859 static bool
4860 ix86_valid_target_attribute_p (tree fndecl,
4861 tree ARG_UNUSED (name),
4862 tree args,
4863 int ARG_UNUSED (flags))
4865 struct gcc_options func_options;
4866 tree new_target, new_optimize;
4867 bool ret = true;
4869 /* attribute((target("default"))) does nothing, beyond
4870 affecting multi-versioning. */
4871 if (TREE_VALUE (args)
4872 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4873 && TREE_CHAIN (args) == NULL_TREE
4874 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4875 return true;
4877 tree old_optimize = build_optimization_node (&global_options);
4879 /* Get the optimization options of the current function. */
4880 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4882 if (!func_optimize)
4883 func_optimize = old_optimize;
4885 /* Init func_options. */
4886 memset (&func_options, 0, sizeof (func_options));
4887 init_options_struct (&func_options, NULL);
4888 lang_hooks.init_options_struct (&func_options);
4890 cl_optimization_restore (&func_options,
4891 TREE_OPTIMIZATION (func_optimize));
4893 /* Initialize func_options to the default before its target options can
4894 be set. */
4895 cl_target_option_restore (&func_options,
4896 TREE_TARGET_OPTION (target_option_default_node));
4898 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4899 &global_options_set);
4901 new_optimize = build_optimization_node (&func_options);
4903 if (new_target == error_mark_node)
4904 ret = false;
4906 else if (fndecl && new_target)
4908 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4910 if (old_optimize != new_optimize)
4911 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4914 return ret;
4918 /* Hook to determine if one function can safely inline another. */
4920 static bool
4921 ix86_can_inline_p (tree caller, tree callee)
4923 bool ret = false;
4924 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4925 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4927 /* If callee has no option attributes, then it is ok to inline. */
4928 if (!callee_tree)
4929 ret = true;
4931 /* If caller has no option attributes, but callee does then it is not ok to
4932 inline. */
4933 else if (!caller_tree)
4934 ret = false;
4936 else
4938 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4939 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4941 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4942 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4943 function. */
4944 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4945 != callee_opts->x_ix86_isa_flags)
4946 ret = false;
4948 /* See if we have the same non-isa options. */
4949 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4950 ret = false;
4952 /* See if arch, tune, etc. are the same. */
4953 else if (caller_opts->arch != callee_opts->arch)
4954 ret = false;
4956 else if (caller_opts->tune != callee_opts->tune)
4957 ret = false;
4959 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4960 ret = false;
4962 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4963 ret = false;
4965 else
4966 ret = true;
4969 return ret;
4973 /* Remember the last target of ix86_set_current_function. */
4974 static GTY(()) tree ix86_previous_fndecl;
4976 /* Invalidate ix86_previous_fndecl cache. */
4977 void
4978 ix86_reset_previous_fndecl (void)
4980 ix86_previous_fndecl = NULL_TREE;
4983 /* Establish appropriate back-end context for processing the function
4984 FNDECL. The argument might be NULL to indicate processing at top
4985 level, outside of any function scope. */
4986 static void
4987 ix86_set_current_function (tree fndecl)
4989 /* Only change the context if the function changes. This hook is called
4990 several times in the course of compiling a function, and we don't want to
4991 slow things down too much or call target_reinit when it isn't safe. */
4992 if (fndecl && fndecl != ix86_previous_fndecl)
4994 tree old_tree = (ix86_previous_fndecl
4995 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4996 : NULL_TREE);
4998 tree new_tree = (fndecl
4999 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5000 : NULL_TREE);
5002 ix86_previous_fndecl = fndecl;
5003 if (old_tree == new_tree)
5006 else if (new_tree)
5008 cl_target_option_restore (&global_options,
5009 TREE_TARGET_OPTION (new_tree));
5010 if (TREE_TARGET_GLOBALS (new_tree))
5011 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5012 else
5013 TREE_TARGET_GLOBALS (new_tree)
5014 = save_target_globals_default_opts ();
5017 else if (old_tree)
5019 new_tree = target_option_current_node;
5020 cl_target_option_restore (&global_options,
5021 TREE_TARGET_OPTION (new_tree));
5022 if (TREE_TARGET_GLOBALS (new_tree))
5023 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5024 else if (new_tree == target_option_default_node)
5025 restore_target_globals (&default_target_globals);
5026 else
5027 TREE_TARGET_GLOBALS (new_tree)
5028 = save_target_globals_default_opts ();
5034 /* Return true if this goes in large data/bss. */
5036 static bool
5037 ix86_in_large_data_p (tree exp)
5039 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5040 return false;
5042 /* Functions are never large data. */
5043 if (TREE_CODE (exp) == FUNCTION_DECL)
5044 return false;
5046 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5048 const char *section = DECL_SECTION_NAME (exp);
5049 if (strcmp (section, ".ldata") == 0
5050 || strcmp (section, ".lbss") == 0)
5051 return true;
5052 return false;
5054 else
5056 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5058 /* If this is an incomplete type with size 0, then we can't put it
5059 in data because it might be too big when completed. Also,
5060 int_size_in_bytes returns -1 if size can vary or is larger than
5061 an integer in which case also it is safer to assume that it goes in
5062 large data. */
5063 if (size <= 0 || size > ix86_section_threshold)
5064 return true;
5067 return false;
5070 /* Switch to the appropriate section for output of DECL.
5071 DECL is either a `VAR_DECL' node or a constant of some sort.
5072 RELOC indicates whether forming the initial value of DECL requires
5073 link-time relocations. */
5075 ATTRIBUTE_UNUSED static section *
5076 x86_64_elf_select_section (tree decl, int reloc,
5077 unsigned HOST_WIDE_INT align)
5079 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5080 && ix86_in_large_data_p (decl))
5082 const char *sname = NULL;
5083 unsigned int flags = SECTION_WRITE;
5084 switch (categorize_decl_for_section (decl, reloc))
5086 case SECCAT_DATA:
5087 sname = ".ldata";
5088 break;
5089 case SECCAT_DATA_REL:
5090 sname = ".ldata.rel";
5091 break;
5092 case SECCAT_DATA_REL_LOCAL:
5093 sname = ".ldata.rel.local";
5094 break;
5095 case SECCAT_DATA_REL_RO:
5096 sname = ".ldata.rel.ro";
5097 break;
5098 case SECCAT_DATA_REL_RO_LOCAL:
5099 sname = ".ldata.rel.ro.local";
5100 break;
5101 case SECCAT_BSS:
5102 sname = ".lbss";
5103 flags |= SECTION_BSS;
5104 break;
5105 case SECCAT_RODATA:
5106 case SECCAT_RODATA_MERGE_STR:
5107 case SECCAT_RODATA_MERGE_STR_INIT:
5108 case SECCAT_RODATA_MERGE_CONST:
5109 sname = ".lrodata";
5110 flags = 0;
5111 break;
5112 case SECCAT_SRODATA:
5113 case SECCAT_SDATA:
5114 case SECCAT_SBSS:
5115 gcc_unreachable ();
5116 case SECCAT_TEXT:
5117 case SECCAT_TDATA:
5118 case SECCAT_TBSS:
5119 /* We don't split these for medium model. Place them into
5120 default sections and hope for best. */
5121 break;
5123 if (sname)
5125 /* We might get called with string constants, but get_named_section
5126 doesn't like them as they are not DECLs. Also, we need to set
5127 flags in that case. */
5128 if (!DECL_P (decl))
5129 return get_section (sname, flags, NULL);
5130 return get_named_section (decl, sname, reloc);
5133 return default_elf_select_section (decl, reloc, align);
5136 /* Select a set of attributes for section NAME based on the properties
5137 of DECL and whether or not RELOC indicates that DECL's initializer
5138 might contain runtime relocations. */
5140 static unsigned int ATTRIBUTE_UNUSED
5141 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5143 unsigned int flags = default_section_type_flags (decl, name, reloc);
5145 if (decl == NULL_TREE
5146 && (strcmp (name, ".ldata.rel.ro") == 0
5147 || strcmp (name, ".ldata.rel.ro.local") == 0))
5148 flags |= SECTION_RELRO;
5150 if (strcmp (name, ".lbss") == 0
5151 || strncmp (name, ".lbss.", 5) == 0
5152 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5153 flags |= SECTION_BSS;
5155 return flags;
5158 /* Build up a unique section name, expressed as a
5159 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5160 RELOC indicates whether the initial value of EXP requires
5161 link-time relocations. */
5163 static void ATTRIBUTE_UNUSED
5164 x86_64_elf_unique_section (tree decl, int reloc)
5166 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5167 && ix86_in_large_data_p (decl))
5169 const char *prefix = NULL;
5170 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5171 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5173 switch (categorize_decl_for_section (decl, reloc))
5175 case SECCAT_DATA:
5176 case SECCAT_DATA_REL:
5177 case SECCAT_DATA_REL_LOCAL:
5178 case SECCAT_DATA_REL_RO:
5179 case SECCAT_DATA_REL_RO_LOCAL:
5180 prefix = one_only ? ".ld" : ".ldata";
5181 break;
5182 case SECCAT_BSS:
5183 prefix = one_only ? ".lb" : ".lbss";
5184 break;
5185 case SECCAT_RODATA:
5186 case SECCAT_RODATA_MERGE_STR:
5187 case SECCAT_RODATA_MERGE_STR_INIT:
5188 case SECCAT_RODATA_MERGE_CONST:
5189 prefix = one_only ? ".lr" : ".lrodata";
5190 break;
5191 case SECCAT_SRODATA:
5192 case SECCAT_SDATA:
5193 case SECCAT_SBSS:
5194 gcc_unreachable ();
5195 case SECCAT_TEXT:
5196 case SECCAT_TDATA:
5197 case SECCAT_TBSS:
5198 /* We don't split these for medium model. Place them into
5199 default sections and hope for best. */
5200 break;
5202 if (prefix)
5204 const char *name, *linkonce;
5205 char *string;
5207 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5208 name = targetm.strip_name_encoding (name);
5210 /* If we're using one_only, then there needs to be a .gnu.linkonce
5211 prefix to the section name. */
5212 linkonce = one_only ? ".gnu.linkonce" : "";
5214 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5216 set_decl_section_name (decl, string);
5217 return;
5220 default_unique_section (decl, reloc);
5223 #ifdef COMMON_ASM_OP
5224 /* This says how to output assembler code to declare an
5225 uninitialized external linkage data object.
5227 For medium model x86-64 we need to use .largecomm opcode for
5228 large objects. */
5229 void
5230 x86_elf_aligned_common (FILE *file,
5231 const char *name, unsigned HOST_WIDE_INT size,
5232 int align)
5234 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5235 && size > (unsigned int)ix86_section_threshold)
5236 fputs (".largecomm\t", file);
5237 else
5238 fputs (COMMON_ASM_OP, file);
5239 assemble_name (file, name);
5240 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5241 size, align / BITS_PER_UNIT);
5243 #endif
5245 /* Utility function for targets to use in implementing
5246 ASM_OUTPUT_ALIGNED_BSS. */
5248 void
5249 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5250 unsigned HOST_WIDE_INT size, int align)
5252 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5253 && size > (unsigned int)ix86_section_threshold)
5254 switch_to_section (get_named_section (decl, ".lbss", 0));
5255 else
5256 switch_to_section (bss_section);
5257 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5258 #ifdef ASM_DECLARE_OBJECT_NAME
5259 last_assemble_variable_decl = decl;
5260 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5261 #else
5262 /* Standard thing is just output label for the object. */
5263 ASM_OUTPUT_LABEL (file, name);
5264 #endif /* ASM_DECLARE_OBJECT_NAME */
5265 ASM_OUTPUT_SKIP (file, size ? size : 1);
5268 /* Decide whether we must probe the stack before any space allocation
5269 on this target. It's essentially TARGET_STACK_PROBE except when
5270 -fstack-check causes the stack to be already probed differently. */
5272 bool
5273 ix86_target_stack_probe (void)
5275 /* Do not probe the stack twice if static stack checking is enabled. */
5276 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5277 return false;
5279 return TARGET_STACK_PROBE;
5282 /* Decide whether we can make a sibling call to a function. DECL is the
5283 declaration of the function being targeted by the call and EXP is the
5284 CALL_EXPR representing the call. */
5286 static bool
5287 ix86_function_ok_for_sibcall (tree decl, tree exp)
5289 tree type, decl_or_type;
5290 rtx a, b;
5292 /* If we are generating position-independent code, we cannot sibcall
5293 optimize any indirect call, or a direct call to a global function,
5294 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5295 if (!TARGET_MACHO
5296 && !TARGET_64BIT
5297 && flag_pic
5298 && (!decl || !targetm.binds_local_p (decl)))
5299 return false;
5301 /* If we need to align the outgoing stack, then sibcalling would
5302 unalign the stack, which may break the called function. */
5303 if (ix86_minimum_incoming_stack_boundary (true)
5304 < PREFERRED_STACK_BOUNDARY)
5305 return false;
5307 if (decl)
5309 decl_or_type = decl;
5310 type = TREE_TYPE (decl);
5312 else
5314 /* We're looking at the CALL_EXPR, we need the type of the function. */
5315 type = CALL_EXPR_FN (exp); /* pointer expression */
5316 type = TREE_TYPE (type); /* pointer type */
5317 type = TREE_TYPE (type); /* function type */
5318 decl_or_type = type;
5321 /* Check that the return value locations are the same. Like
5322 if we are returning floats on the 80387 register stack, we cannot
5323 make a sibcall from a function that doesn't return a float to a
5324 function that does or, conversely, from a function that does return
5325 a float to a function that doesn't; the necessary stack adjustment
5326 would not be executed. This is also the place we notice
5327 differences in the return value ABI. Note that it is ok for one
5328 of the functions to have void return type as long as the return
5329 value of the other is passed in a register. */
5330 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5331 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5332 cfun->decl, false);
5333 if (STACK_REG_P (a) || STACK_REG_P (b))
5335 if (!rtx_equal_p (a, b))
5336 return false;
5338 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5340 else if (!rtx_equal_p (a, b))
5341 return false;
5343 if (TARGET_64BIT)
5345 /* The SYSV ABI has more call-clobbered registers;
5346 disallow sibcalls from MS to SYSV. */
5347 if (cfun->machine->call_abi == MS_ABI
5348 && ix86_function_type_abi (type) == SYSV_ABI)
5349 return false;
5351 else
5353 /* If this call is indirect, we'll need to be able to use a
5354 call-clobbered register for the address of the target function.
5355 Make sure that all such registers are not used for passing
5356 parameters. Note that DLLIMPORT functions are indirect. */
5357 if (!decl
5358 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5360 if (ix86_function_regparm (type, NULL) >= 3)
5362 /* ??? Need to count the actual number of registers to be used,
5363 not the possible number of registers. Fix later. */
5364 return false;
5369 /* Otherwise okay. That also includes certain types of indirect calls. */
5370 return true;
5373 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5374 and "sseregparm" calling convention attributes;
5375 arguments as in struct attribute_spec.handler. */
5377 static tree
5378 ix86_handle_cconv_attribute (tree *node, tree name,
5379 tree args,
5380 int,
5381 bool *no_add_attrs)
5383 if (TREE_CODE (*node) != FUNCTION_TYPE
5384 && TREE_CODE (*node) != METHOD_TYPE
5385 && TREE_CODE (*node) != FIELD_DECL
5386 && TREE_CODE (*node) != TYPE_DECL)
5388 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5389 name);
5390 *no_add_attrs = true;
5391 return NULL_TREE;
5394 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5395 if (is_attribute_p ("regparm", name))
5397 tree cst;
5399 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5401 error ("fastcall and regparm attributes are not compatible");
5404 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5406 error ("regparam and thiscall attributes are not compatible");
5409 cst = TREE_VALUE (args);
5410 if (TREE_CODE (cst) != INTEGER_CST)
5412 warning (OPT_Wattributes,
5413 "%qE attribute requires an integer constant argument",
5414 name);
5415 *no_add_attrs = true;
5417 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5419 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5420 name, REGPARM_MAX);
5421 *no_add_attrs = true;
5424 return NULL_TREE;
5427 if (TARGET_64BIT)
5429 /* Do not warn when emulating the MS ABI. */
5430 if ((TREE_CODE (*node) != FUNCTION_TYPE
5431 && TREE_CODE (*node) != METHOD_TYPE)
5432 || ix86_function_type_abi (*node) != MS_ABI)
5433 warning (OPT_Wattributes, "%qE attribute ignored",
5434 name);
5435 *no_add_attrs = true;
5436 return NULL_TREE;
5439 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5440 if (is_attribute_p ("fastcall", name))
5442 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5444 error ("fastcall and cdecl attributes are not compatible");
5446 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5448 error ("fastcall and stdcall attributes are not compatible");
5450 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5452 error ("fastcall and regparm attributes are not compatible");
5454 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5456 error ("fastcall and thiscall attributes are not compatible");
5460 /* Can combine stdcall with fastcall (redundant), regparm and
5461 sseregparm. */
5462 else if (is_attribute_p ("stdcall", name))
5464 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5466 error ("stdcall and cdecl attributes are not compatible");
5468 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5470 error ("stdcall and fastcall attributes are not compatible");
5472 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5474 error ("stdcall and thiscall attributes are not compatible");
5478 /* Can combine cdecl with regparm and sseregparm. */
5479 else if (is_attribute_p ("cdecl", name))
5481 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5483 error ("stdcall and cdecl attributes are not compatible");
5485 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5487 error ("fastcall and cdecl attributes are not compatible");
5489 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5491 error ("cdecl and thiscall attributes are not compatible");
5494 else if (is_attribute_p ("thiscall", name))
5496 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5497 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5498 name);
5499 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5501 error ("stdcall and thiscall attributes are not compatible");
5503 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5505 error ("fastcall and thiscall attributes are not compatible");
5507 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5509 error ("cdecl and thiscall attributes are not compatible");
5513 /* Can combine sseregparm with all attributes. */
5515 return NULL_TREE;
5518 /* The transactional memory builtins are implicitly regparm or fastcall
5519 depending on the ABI. Override the generic do-nothing attribute that
5520 these builtins were declared with, and replace it with one of the two
5521 attributes that we expect elsewhere. */
5523 static tree
5524 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5525 int flags, bool *no_add_attrs)
5527 tree alt;
5529 /* In no case do we want to add the placeholder attribute. */
5530 *no_add_attrs = true;
5532 /* The 64-bit ABI is unchanged for transactional memory. */
5533 if (TARGET_64BIT)
5534 return NULL_TREE;
5536 /* ??? Is there a better way to validate 32-bit windows? We have
5537 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5538 if (CHECK_STACK_LIMIT > 0)
5539 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5540 else
5542 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5543 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5545 decl_attributes (node, alt, flags);
5547 return NULL_TREE;
5550 /* This function determines from TYPE the calling-convention. */
5552 unsigned int
5553 ix86_get_callcvt (const_tree type)
5555 unsigned int ret = 0;
5556 bool is_stdarg;
5557 tree attrs;
5559 if (TARGET_64BIT)
5560 return IX86_CALLCVT_CDECL;
5562 attrs = TYPE_ATTRIBUTES (type);
5563 if (attrs != NULL_TREE)
5565 if (lookup_attribute ("cdecl", attrs))
5566 ret |= IX86_CALLCVT_CDECL;
5567 else if (lookup_attribute ("stdcall", attrs))
5568 ret |= IX86_CALLCVT_STDCALL;
5569 else if (lookup_attribute ("fastcall", attrs))
5570 ret |= IX86_CALLCVT_FASTCALL;
5571 else if (lookup_attribute ("thiscall", attrs))
5572 ret |= IX86_CALLCVT_THISCALL;
5574 /* Regparam isn't allowed for thiscall and fastcall. */
5575 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5577 if (lookup_attribute ("regparm", attrs))
5578 ret |= IX86_CALLCVT_REGPARM;
5579 if (lookup_attribute ("sseregparm", attrs))
5580 ret |= IX86_CALLCVT_SSEREGPARM;
5583 if (IX86_BASE_CALLCVT(ret) != 0)
5584 return ret;
5587 is_stdarg = stdarg_p (type);
5588 if (TARGET_RTD && !is_stdarg)
5589 return IX86_CALLCVT_STDCALL | ret;
5591 if (ret != 0
5592 || is_stdarg
5593 || TREE_CODE (type) != METHOD_TYPE
5594 || ix86_function_type_abi (type) != MS_ABI)
5595 return IX86_CALLCVT_CDECL | ret;
5597 return IX86_CALLCVT_THISCALL;
5600 /* Return 0 if the attributes for two types are incompatible, 1 if they
5601 are compatible, and 2 if they are nearly compatible (which causes a
5602 warning to be generated). */
5604 static int
5605 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5607 unsigned int ccvt1, ccvt2;
5609 if (TREE_CODE (type1) != FUNCTION_TYPE
5610 && TREE_CODE (type1) != METHOD_TYPE)
5611 return 1;
5613 ccvt1 = ix86_get_callcvt (type1);
5614 ccvt2 = ix86_get_callcvt (type2);
5615 if (ccvt1 != ccvt2)
5616 return 0;
5617 if (ix86_function_regparm (type1, NULL)
5618 != ix86_function_regparm (type2, NULL))
5619 return 0;
5621 return 1;
5624 /* Return the regparm value for a function with the indicated TYPE and DECL.
5625 DECL may be NULL when calling function indirectly
5626 or considering a libcall. */
5628 static int
5629 ix86_function_regparm (const_tree type, const_tree decl)
5631 tree attr;
5632 int regparm;
5633 unsigned int ccvt;
5635 if (TARGET_64BIT)
5636 return (ix86_function_type_abi (type) == SYSV_ABI
5637 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5638 ccvt = ix86_get_callcvt (type);
5639 regparm = ix86_regparm;
5641 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5643 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5644 if (attr)
5646 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5647 return regparm;
5650 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5651 return 2;
5652 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5653 return 1;
5655 /* Use register calling convention for local functions when possible. */
5656 if (decl
5657 && TREE_CODE (decl) == FUNCTION_DECL
5658 /* Caller and callee must agree on the calling convention, so
5659 checking here just optimize means that with
5660 __attribute__((optimize (...))) caller could use regparm convention
5661 and callee not, or vice versa. Instead look at whether the callee
5662 is optimized or not. */
5663 && opt_for_fn (decl, optimize)
5664 && !(profile_flag && !flag_fentry))
5666 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5667 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5668 if (i && i->local && i->can_change_signature)
5670 int local_regparm, globals = 0, regno;
5672 /* Make sure no regparm register is taken by a
5673 fixed register variable. */
5674 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5675 if (fixed_regs[local_regparm])
5676 break;
5678 /* We don't want to use regparm(3) for nested functions as
5679 these use a static chain pointer in the third argument. */
5680 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5681 local_regparm = 2;
5683 /* In 32-bit mode save a register for the split stack. */
5684 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5685 local_regparm = 2;
5687 /* Each fixed register usage increases register pressure,
5688 so less registers should be used for argument passing.
5689 This functionality can be overriden by an explicit
5690 regparm value. */
5691 for (regno = AX_REG; regno <= DI_REG; regno++)
5692 if (fixed_regs[regno])
5693 globals++;
5695 local_regparm
5696 = globals < local_regparm ? local_regparm - globals : 0;
5698 if (local_regparm > regparm)
5699 regparm = local_regparm;
5703 return regparm;
5706 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5707 DFmode (2) arguments in SSE registers for a function with the
5708 indicated TYPE and DECL. DECL may be NULL when calling function
5709 indirectly or considering a libcall. Otherwise return 0. */
5711 static int
5712 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5714 gcc_assert (!TARGET_64BIT);
5716 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5717 by the sseregparm attribute. */
5718 if (TARGET_SSEREGPARM
5719 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5721 if (!TARGET_SSE)
5723 if (warn)
5725 if (decl)
5726 error ("calling %qD with attribute sseregparm without "
5727 "SSE/SSE2 enabled", decl);
5728 else
5729 error ("calling %qT with attribute sseregparm without "
5730 "SSE/SSE2 enabled", type);
5732 return 0;
5735 return 2;
5738 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5739 (and DFmode for SSE2) arguments in SSE registers. */
5740 if (decl && TARGET_SSE_MATH && optimize
5741 && !(profile_flag && !flag_fentry))
5743 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5744 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5745 if (i && i->local && i->can_change_signature)
5746 return TARGET_SSE2 ? 2 : 1;
5749 return 0;
5752 /* Return true if EAX is live at the start of the function. Used by
5753 ix86_expand_prologue to determine if we need special help before
5754 calling allocate_stack_worker. */
5756 static bool
5757 ix86_eax_live_at_start_p (void)
5759 /* Cheat. Don't bother working forward from ix86_function_regparm
5760 to the function type to whether an actual argument is located in
5761 eax. Instead just look at cfg info, which is still close enough
5762 to correct at this point. This gives false positives for broken
5763 functions that might use uninitialized data that happens to be
5764 allocated in eax, but who cares? */
5765 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5768 static bool
5769 ix86_keep_aggregate_return_pointer (tree fntype)
5771 tree attr;
5773 if (!TARGET_64BIT)
5775 attr = lookup_attribute ("callee_pop_aggregate_return",
5776 TYPE_ATTRIBUTES (fntype));
5777 if (attr)
5778 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5780 /* For 32-bit MS-ABI the default is to keep aggregate
5781 return pointer. */
5782 if (ix86_function_type_abi (fntype) == MS_ABI)
5783 return true;
5785 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5788 /* Value is the number of bytes of arguments automatically
5789 popped when returning from a subroutine call.
5790 FUNDECL is the declaration node of the function (as a tree),
5791 FUNTYPE is the data type of the function (as a tree),
5792 or for a library call it is an identifier node for the subroutine name.
5793 SIZE is the number of bytes of arguments passed on the stack.
5795 On the 80386, the RTD insn may be used to pop them if the number
5796 of args is fixed, but if the number is variable then the caller
5797 must pop them all. RTD can't be used for library calls now
5798 because the library is compiled with the Unix compiler.
5799 Use of RTD is a selectable option, since it is incompatible with
5800 standard Unix calling sequences. If the option is not selected,
5801 the caller must always pop the args.
5803 The attribute stdcall is equivalent to RTD on a per module basis. */
5805 static int
5806 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5808 unsigned int ccvt;
5810 /* None of the 64-bit ABIs pop arguments. */
5811 if (TARGET_64BIT)
5812 return 0;
5814 ccvt = ix86_get_callcvt (funtype);
5816 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5817 | IX86_CALLCVT_THISCALL)) != 0
5818 && ! stdarg_p (funtype))
5819 return size;
5821 /* Lose any fake structure return argument if it is passed on the stack. */
5822 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5823 && !ix86_keep_aggregate_return_pointer (funtype))
5825 int nregs = ix86_function_regparm (funtype, fundecl);
5826 if (nregs == 0)
5827 return GET_MODE_SIZE (Pmode);
5830 return 0;
5833 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5835 static bool
5836 ix86_legitimate_combined_insn (rtx_insn *insn)
5838 /* Check operand constraints in case hard registers were propagated
5839 into insn pattern. This check prevents combine pass from
5840 generating insn patterns with invalid hard register operands.
5841 These invalid insns can eventually confuse reload to error out
5842 with a spill failure. See also PRs 46829 and 46843. */
5843 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5845 int i;
5847 extract_insn (insn);
5848 preprocess_constraints (insn);
5850 int n_operands = recog_data.n_operands;
5851 int n_alternatives = recog_data.n_alternatives;
5852 for (i = 0; i < n_operands; i++)
5854 rtx op = recog_data.operand[i];
5855 enum machine_mode mode = GET_MODE (op);
5856 const operand_alternative *op_alt;
5857 int offset = 0;
5858 bool win;
5859 int j;
5861 /* For pre-AVX disallow unaligned loads/stores where the
5862 instructions don't support it. */
5863 if (!TARGET_AVX
5864 && VECTOR_MODE_P (GET_MODE (op))
5865 && misaligned_operand (op, GET_MODE (op)))
5867 int min_align = get_attr_ssememalign (insn);
5868 if (min_align == 0)
5869 return false;
5872 /* A unary operator may be accepted by the predicate, but it
5873 is irrelevant for matching constraints. */
5874 if (UNARY_P (op))
5875 op = XEXP (op, 0);
5877 if (GET_CODE (op) == SUBREG)
5879 if (REG_P (SUBREG_REG (op))
5880 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5881 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5882 GET_MODE (SUBREG_REG (op)),
5883 SUBREG_BYTE (op),
5884 GET_MODE (op));
5885 op = SUBREG_REG (op);
5888 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5889 continue;
5891 op_alt = recog_op_alt;
5893 /* Operand has no constraints, anything is OK. */
5894 win = !n_alternatives;
5896 alternative_mask enabled = recog_data.enabled_alternatives;
5897 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5899 if (!TEST_BIT (enabled, j))
5900 continue;
5901 if (op_alt[i].anything_ok
5902 || (op_alt[i].matches != -1
5903 && operands_match_p
5904 (recog_data.operand[i],
5905 recog_data.operand[op_alt[i].matches]))
5906 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5908 win = true;
5909 break;
5913 if (!win)
5914 return false;
5918 return true;
5921 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5923 static unsigned HOST_WIDE_INT
5924 ix86_asan_shadow_offset (void)
5926 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5927 : HOST_WIDE_INT_C (0x7fff8000))
5928 : (HOST_WIDE_INT_1 << 29);
5931 /* Argument support functions. */
5933 /* Return true when register may be used to pass function parameters. */
5934 bool
5935 ix86_function_arg_regno_p (int regno)
5937 int i;
5938 const int *parm_regs;
5940 if (!TARGET_64BIT)
5942 if (TARGET_MACHO)
5943 return (regno < REGPARM_MAX
5944 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5945 else
5946 return (regno < REGPARM_MAX
5947 || (TARGET_MMX && MMX_REGNO_P (regno)
5948 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5949 || (TARGET_SSE && SSE_REGNO_P (regno)
5950 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5953 if (TARGET_SSE && SSE_REGNO_P (regno)
5954 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5955 return true;
5957 /* TODO: The function should depend on current function ABI but
5958 builtins.c would need updating then. Therefore we use the
5959 default ABI. */
5961 /* RAX is used as hidden argument to va_arg functions. */
5962 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5963 return true;
5965 if (ix86_abi == MS_ABI)
5966 parm_regs = x86_64_ms_abi_int_parameter_registers;
5967 else
5968 parm_regs = x86_64_int_parameter_registers;
5969 for (i = 0; i < (ix86_abi == MS_ABI
5970 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5971 if (regno == parm_regs[i])
5972 return true;
5973 return false;
5976 /* Return if we do not know how to pass TYPE solely in registers. */
5978 static bool
5979 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5981 if (must_pass_in_stack_var_size_or_pad (mode, type))
5982 return true;
5984 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5985 The layout_type routine is crafty and tries to trick us into passing
5986 currently unsupported vector types on the stack by using TImode. */
5987 return (!TARGET_64BIT && mode == TImode
5988 && type && TREE_CODE (type) != VECTOR_TYPE);
5991 /* It returns the size, in bytes, of the area reserved for arguments passed
5992 in registers for the function represented by fndecl dependent to the used
5993 abi format. */
5995 ix86_reg_parm_stack_space (const_tree fndecl)
5997 enum calling_abi call_abi = SYSV_ABI;
5998 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5999 call_abi = ix86_function_abi (fndecl);
6000 else
6001 call_abi = ix86_function_type_abi (fndecl);
6002 if (TARGET_64BIT && call_abi == MS_ABI)
6003 return 32;
6004 return 0;
6007 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6008 call abi used. */
6009 enum calling_abi
6010 ix86_function_type_abi (const_tree fntype)
6012 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6014 enum calling_abi abi = ix86_abi;
6015 if (abi == SYSV_ABI)
6017 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6018 abi = MS_ABI;
6020 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6021 abi = SYSV_ABI;
6022 return abi;
6024 return ix86_abi;
6027 /* We add this as a workaround in order to use libc_has_function
6028 hook in i386.md. */
6029 bool
6030 ix86_libc_has_function (enum function_class fn_class)
6032 return targetm.libc_has_function (fn_class);
6035 static bool
6036 ix86_function_ms_hook_prologue (const_tree fn)
6038 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6040 if (decl_function_context (fn) != NULL_TREE)
6041 error_at (DECL_SOURCE_LOCATION (fn),
6042 "ms_hook_prologue is not compatible with nested function");
6043 else
6044 return true;
6046 return false;
6049 static enum calling_abi
6050 ix86_function_abi (const_tree fndecl)
6052 if (! fndecl)
6053 return ix86_abi;
6054 return ix86_function_type_abi (TREE_TYPE (fndecl));
6057 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6058 call abi used. */
6059 enum calling_abi
6060 ix86_cfun_abi (void)
6062 if (! cfun)
6063 return ix86_abi;
6064 return cfun->machine->call_abi;
6067 /* Write the extra assembler code needed to declare a function properly. */
6069 void
6070 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6071 tree decl)
6073 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6075 if (is_ms_hook)
6077 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6078 unsigned int filler_cc = 0xcccccccc;
6080 for (i = 0; i < filler_count; i += 4)
6081 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6084 #ifdef SUBTARGET_ASM_UNWIND_INIT
6085 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6086 #endif
6088 ASM_OUTPUT_LABEL (asm_out_file, fname);
6090 /* Output magic byte marker, if hot-patch attribute is set. */
6091 if (is_ms_hook)
6093 if (TARGET_64BIT)
6095 /* leaq [%rsp + 0], %rsp */
6096 asm_fprintf (asm_out_file, ASM_BYTE
6097 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6099 else
6101 /* movl.s %edi, %edi
6102 push %ebp
6103 movl.s %esp, %ebp */
6104 asm_fprintf (asm_out_file, ASM_BYTE
6105 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6110 /* regclass.c */
6111 extern void init_regs (void);
6113 /* Implementation of call abi switching target hook. Specific to FNDECL
6114 the specific call register sets are set. See also
6115 ix86_conditional_register_usage for more details. */
6116 void
6117 ix86_call_abi_override (const_tree fndecl)
6119 if (fndecl == NULL_TREE)
6120 cfun->machine->call_abi = ix86_abi;
6121 else
6122 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6125 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6126 expensive re-initialization of init_regs each time we switch function context
6127 since this is needed only during RTL expansion. */
6128 static void
6129 ix86_maybe_switch_abi (void)
6131 if (TARGET_64BIT &&
6132 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6133 reinit_regs ();
6136 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6137 for a call to a function whose data type is FNTYPE.
6138 For a library call, FNTYPE is 0. */
6140 void
6141 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6142 tree fntype, /* tree ptr for function decl */
6143 rtx libname, /* SYMBOL_REF of library name or 0 */
6144 tree fndecl,
6145 int caller)
6147 struct cgraph_local_info *i;
6149 memset (cum, 0, sizeof (*cum));
6151 if (fndecl)
6153 i = cgraph_node::local_info (fndecl);
6154 cum->call_abi = ix86_function_abi (fndecl);
6156 else
6158 i = NULL;
6159 cum->call_abi = ix86_function_type_abi (fntype);
6162 cum->caller = caller;
6164 /* Set up the number of registers to use for passing arguments. */
6165 cum->nregs = ix86_regparm;
6166 if (TARGET_64BIT)
6168 cum->nregs = (cum->call_abi == SYSV_ABI
6169 ? X86_64_REGPARM_MAX
6170 : X86_64_MS_REGPARM_MAX);
6172 if (TARGET_SSE)
6174 cum->sse_nregs = SSE_REGPARM_MAX;
6175 if (TARGET_64BIT)
6177 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6178 ? X86_64_SSE_REGPARM_MAX
6179 : X86_64_MS_SSE_REGPARM_MAX);
6182 if (TARGET_MMX)
6183 cum->mmx_nregs = MMX_REGPARM_MAX;
6184 cum->warn_avx512f = true;
6185 cum->warn_avx = true;
6186 cum->warn_sse = true;
6187 cum->warn_mmx = true;
6189 /* Because type might mismatch in between caller and callee, we need to
6190 use actual type of function for local calls.
6191 FIXME: cgraph_analyze can be told to actually record if function uses
6192 va_start so for local functions maybe_vaarg can be made aggressive
6193 helping K&R code.
6194 FIXME: once typesytem is fixed, we won't need this code anymore. */
6195 if (i && i->local && i->can_change_signature)
6196 fntype = TREE_TYPE (fndecl);
6197 cum->maybe_vaarg = (fntype
6198 ? (!prototype_p (fntype) || stdarg_p (fntype))
6199 : !libname);
6201 if (!TARGET_64BIT)
6203 /* If there are variable arguments, then we won't pass anything
6204 in registers in 32-bit mode. */
6205 if (stdarg_p (fntype))
6207 cum->nregs = 0;
6208 cum->sse_nregs = 0;
6209 cum->mmx_nregs = 0;
6210 cum->warn_avx512f = false;
6211 cum->warn_avx = false;
6212 cum->warn_sse = false;
6213 cum->warn_mmx = false;
6214 return;
6217 /* Use ecx and edx registers if function has fastcall attribute,
6218 else look for regparm information. */
6219 if (fntype)
6221 unsigned int ccvt = ix86_get_callcvt (fntype);
6222 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6224 cum->nregs = 1;
6225 cum->fastcall = 1; /* Same first register as in fastcall. */
6227 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6229 cum->nregs = 2;
6230 cum->fastcall = 1;
6232 else
6233 cum->nregs = ix86_function_regparm (fntype, fndecl);
6236 /* Set up the number of SSE registers used for passing SFmode
6237 and DFmode arguments. Warn for mismatching ABI. */
6238 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6242 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6243 But in the case of vector types, it is some vector mode.
6245 When we have only some of our vector isa extensions enabled, then there
6246 are some modes for which vector_mode_supported_p is false. For these
6247 modes, the generic vector support in gcc will choose some non-vector mode
6248 in order to implement the type. By computing the natural mode, we'll
6249 select the proper ABI location for the operand and not depend on whatever
6250 the middle-end decides to do with these vector types.
6252 The midde-end can't deal with the vector types > 16 bytes. In this
6253 case, we return the original mode and warn ABI change if CUM isn't
6254 NULL.
6256 If INT_RETURN is true, warn ABI change if the vector mode isn't
6257 available for function return value. */
6259 static enum machine_mode
6260 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6261 bool in_return)
6263 enum machine_mode mode = TYPE_MODE (type);
6265 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6267 HOST_WIDE_INT size = int_size_in_bytes (type);
6268 if ((size == 8 || size == 16 || size == 32 || size == 64)
6269 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6270 && TYPE_VECTOR_SUBPARTS (type) > 1)
6272 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6274 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6275 mode = MIN_MODE_VECTOR_FLOAT;
6276 else
6277 mode = MIN_MODE_VECTOR_INT;
6279 /* Get the mode which has this inner mode and number of units. */
6280 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6281 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6282 && GET_MODE_INNER (mode) == innermode)
6284 if (size == 64 && !TARGET_AVX512F)
6286 static bool warnedavx512f;
6287 static bool warnedavx512f_ret;
6289 if (cum && cum->warn_avx512f && !warnedavx512f)
6291 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6292 "without AVX512F enabled changes the ABI"))
6293 warnedavx512f = true;
6295 else if (in_return && !warnedavx512f_ret)
6297 if (warning (OPT_Wpsabi, "AVX512F vector return "
6298 "without AVX512F enabled changes the ABI"))
6299 warnedavx512f_ret = true;
6302 return TYPE_MODE (type);
6304 else if (size == 32 && !TARGET_AVX)
6306 static bool warnedavx;
6307 static bool warnedavx_ret;
6309 if (cum && cum->warn_avx && !warnedavx)
6311 if (warning (OPT_Wpsabi, "AVX vector argument "
6312 "without AVX enabled changes the ABI"))
6313 warnedavx = true;
6315 else if (in_return && !warnedavx_ret)
6317 if (warning (OPT_Wpsabi, "AVX vector return "
6318 "without AVX enabled changes the ABI"))
6319 warnedavx_ret = true;
6322 return TYPE_MODE (type);
6324 else if (((size == 8 && TARGET_64BIT) || size == 16)
6325 && !TARGET_SSE)
6327 static bool warnedsse;
6328 static bool warnedsse_ret;
6330 if (cum && cum->warn_sse && !warnedsse)
6332 if (warning (OPT_Wpsabi, "SSE vector argument "
6333 "without SSE enabled changes the ABI"))
6334 warnedsse = true;
6336 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6338 if (warning (OPT_Wpsabi, "SSE vector return "
6339 "without SSE enabled changes the ABI"))
6340 warnedsse_ret = true;
6343 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6345 static bool warnedmmx;
6346 static bool warnedmmx_ret;
6348 if (cum && cum->warn_mmx && !warnedmmx)
6350 if (warning (OPT_Wpsabi, "MMX vector argument "
6351 "without MMX enabled changes the ABI"))
6352 warnedmmx = true;
6354 else if (in_return && !warnedmmx_ret)
6356 if (warning (OPT_Wpsabi, "MMX vector return "
6357 "without MMX enabled changes the ABI"))
6358 warnedmmx_ret = true;
6361 return mode;
6364 gcc_unreachable ();
6368 return mode;
6371 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6372 this may not agree with the mode that the type system has chosen for the
6373 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6374 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6376 static rtx
6377 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6378 unsigned int regno)
6380 rtx tmp;
6382 if (orig_mode != BLKmode)
6383 tmp = gen_rtx_REG (orig_mode, regno);
6384 else
6386 tmp = gen_rtx_REG (mode, regno);
6387 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6388 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6391 return tmp;
6394 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6395 of this code is to classify each 8bytes of incoming argument by the register
6396 class and assign registers accordingly. */
6398 /* Return the union class of CLASS1 and CLASS2.
6399 See the x86-64 PS ABI for details. */
6401 static enum x86_64_reg_class
6402 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6404 /* Rule #1: If both classes are equal, this is the resulting class. */
6405 if (class1 == class2)
6406 return class1;
6408 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6409 the other class. */
6410 if (class1 == X86_64_NO_CLASS)
6411 return class2;
6412 if (class2 == X86_64_NO_CLASS)
6413 return class1;
6415 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6416 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6417 return X86_64_MEMORY_CLASS;
6419 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6420 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6421 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6422 return X86_64_INTEGERSI_CLASS;
6423 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6424 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6425 return X86_64_INTEGER_CLASS;
6427 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6428 MEMORY is used. */
6429 if (class1 == X86_64_X87_CLASS
6430 || class1 == X86_64_X87UP_CLASS
6431 || class1 == X86_64_COMPLEX_X87_CLASS
6432 || class2 == X86_64_X87_CLASS
6433 || class2 == X86_64_X87UP_CLASS
6434 || class2 == X86_64_COMPLEX_X87_CLASS)
6435 return X86_64_MEMORY_CLASS;
6437 /* Rule #6: Otherwise class SSE is used. */
6438 return X86_64_SSE_CLASS;
6441 /* Classify the argument of type TYPE and mode MODE.
6442 CLASSES will be filled by the register class used to pass each word
6443 of the operand. The number of words is returned. In case the parameter
6444 should be passed in memory, 0 is returned. As a special case for zero
6445 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6447 BIT_OFFSET is used internally for handling records and specifies offset
6448 of the offset in bits modulo 512 to avoid overflow cases.
6450 See the x86-64 PS ABI for details.
6453 static int
6454 classify_argument (enum machine_mode mode, const_tree type,
6455 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6457 HOST_WIDE_INT bytes =
6458 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6459 int words
6460 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6462 /* Variable sized entities are always passed/returned in memory. */
6463 if (bytes < 0)
6464 return 0;
6466 if (mode != VOIDmode
6467 && targetm.calls.must_pass_in_stack (mode, type))
6468 return 0;
6470 /* Special case check for pointer to shared, on 64-bit target. */
6471 if (TARGET_64BIT && mode == TImode
6472 && type && TREE_CODE (type) == POINTER_TYPE
6473 && upc_shared_type_p (TREE_TYPE (type)))
6475 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6476 return 2;
6479 if (type && AGGREGATE_TYPE_P (type))
6481 int i;
6482 tree field;
6483 enum x86_64_reg_class subclasses[MAX_CLASSES];
6485 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6486 if (bytes > 64)
6487 return 0;
6489 for (i = 0; i < words; i++)
6490 classes[i] = X86_64_NO_CLASS;
6492 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6493 signalize memory class, so handle it as special case. */
6494 if (!words)
6496 classes[0] = X86_64_NO_CLASS;
6497 return 1;
6500 /* Classify each field of record and merge classes. */
6501 switch (TREE_CODE (type))
6503 case RECORD_TYPE:
6504 /* And now merge the fields of structure. */
6505 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6507 if (TREE_CODE (field) == FIELD_DECL)
6509 int num;
6511 if (TREE_TYPE (field) == error_mark_node)
6512 continue;
6514 /* Bitfields are always classified as integer. Handle them
6515 early, since later code would consider them to be
6516 misaligned integers. */
6517 if (DECL_BIT_FIELD (field))
6519 for (i = (int_bit_position (field)
6520 + (bit_offset % 64)) / 8 / 8;
6521 i < ((int_bit_position (field) + (bit_offset % 64))
6522 + tree_to_shwi (DECL_SIZE (field))
6523 + 63) / 8 / 8; i++)
6524 classes[i] =
6525 merge_classes (X86_64_INTEGER_CLASS,
6526 classes[i]);
6528 else
6530 int pos;
6532 type = TREE_TYPE (field);
6534 /* Flexible array member is ignored. */
6535 if (TYPE_MODE (type) == BLKmode
6536 && TREE_CODE (type) == ARRAY_TYPE
6537 && TYPE_SIZE (type) == NULL_TREE
6538 && TYPE_DOMAIN (type) != NULL_TREE
6539 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6540 == NULL_TREE))
6542 static bool warned;
6544 if (!warned && warn_psabi)
6546 warned = true;
6547 inform (input_location,
6548 "the ABI of passing struct with"
6549 " a flexible array member has"
6550 " changed in GCC 4.4");
6552 continue;
6554 num = classify_argument (TYPE_MODE (type), type,
6555 subclasses,
6556 (int_bit_position (field)
6557 + bit_offset) % 512);
6558 if (!num)
6559 return 0;
6560 pos = (int_bit_position (field)
6561 + (bit_offset % 64)) / 8 / 8;
6562 for (i = 0; i < num && (i + pos) < words; i++)
6563 classes[i + pos] =
6564 merge_classes (subclasses[i], classes[i + pos]);
6568 break;
6570 case ARRAY_TYPE:
6571 /* Arrays are handled as small records. */
6573 int num;
6574 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6575 TREE_TYPE (type), subclasses, bit_offset);
6576 if (!num)
6577 return 0;
6579 /* The partial classes are now full classes. */
6580 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6581 subclasses[0] = X86_64_SSE_CLASS;
6582 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6583 && !((bit_offset % 64) == 0 && bytes == 4))
6584 subclasses[0] = X86_64_INTEGER_CLASS;
6586 for (i = 0; i < words; i++)
6587 classes[i] = subclasses[i % num];
6589 break;
6591 case UNION_TYPE:
6592 case QUAL_UNION_TYPE:
6593 /* Unions are similar to RECORD_TYPE but offset is always 0.
6595 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6597 if (TREE_CODE (field) == FIELD_DECL)
6599 int num;
6601 if (TREE_TYPE (field) == error_mark_node)
6602 continue;
6604 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6605 TREE_TYPE (field), subclasses,
6606 bit_offset);
6607 if (!num)
6608 return 0;
6609 for (i = 0; i < num && i < words; i++)
6610 classes[i] = merge_classes (subclasses[i], classes[i]);
6613 break;
6615 default:
6616 gcc_unreachable ();
6619 if (words > 2)
6621 /* When size > 16 bytes, if the first one isn't
6622 X86_64_SSE_CLASS or any other ones aren't
6623 X86_64_SSEUP_CLASS, everything should be passed in
6624 memory. */
6625 if (classes[0] != X86_64_SSE_CLASS)
6626 return 0;
6628 for (i = 1; i < words; i++)
6629 if (classes[i] != X86_64_SSEUP_CLASS)
6630 return 0;
6633 /* Final merger cleanup. */
6634 for (i = 0; i < words; i++)
6636 /* If one class is MEMORY, everything should be passed in
6637 memory. */
6638 if (classes[i] == X86_64_MEMORY_CLASS)
6639 return 0;
6641 /* The X86_64_SSEUP_CLASS should be always preceded by
6642 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6643 if (classes[i] == X86_64_SSEUP_CLASS
6644 && classes[i - 1] != X86_64_SSE_CLASS
6645 && classes[i - 1] != X86_64_SSEUP_CLASS)
6647 /* The first one should never be X86_64_SSEUP_CLASS. */
6648 gcc_assert (i != 0);
6649 classes[i] = X86_64_SSE_CLASS;
6652 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6653 everything should be passed in memory. */
6654 if (classes[i] == X86_64_X87UP_CLASS
6655 && (classes[i - 1] != X86_64_X87_CLASS))
6657 static bool warned;
6659 /* The first one should never be X86_64_X87UP_CLASS. */
6660 gcc_assert (i != 0);
6661 if (!warned && warn_psabi)
6663 warned = true;
6664 inform (input_location,
6665 "the ABI of passing union with long double"
6666 " has changed in GCC 4.4");
6668 return 0;
6671 return words;
6674 /* Compute alignment needed. We align all types to natural boundaries with
6675 exception of XFmode that is aligned to 64bits. */
6676 if (mode != VOIDmode && mode != BLKmode)
6678 int mode_alignment = GET_MODE_BITSIZE (mode);
6680 if (mode == XFmode)
6681 mode_alignment = 128;
6682 else if (mode == XCmode)
6683 mode_alignment = 256;
6684 if (COMPLEX_MODE_P (mode))
6685 mode_alignment /= 2;
6686 /* Misaligned fields are always returned in memory. */
6687 if (bit_offset % mode_alignment)
6688 return 0;
6691 /* for V1xx modes, just use the base mode */
6692 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6693 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6694 mode = GET_MODE_INNER (mode);
6696 /* Classification of atomic types. */
6697 switch (mode)
6699 case SDmode:
6700 case DDmode:
6701 classes[0] = X86_64_SSE_CLASS;
6702 return 1;
6703 case TDmode:
6704 classes[0] = X86_64_SSE_CLASS;
6705 classes[1] = X86_64_SSEUP_CLASS;
6706 return 2;
6707 case DImode:
6708 case SImode:
6709 case HImode:
6710 case QImode:
6711 case CSImode:
6712 case CHImode:
6713 case CQImode:
6715 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6717 /* Analyze last 128 bits only. */
6718 size = (size - 1) & 0x7f;
6720 if (size < 32)
6722 classes[0] = X86_64_INTEGERSI_CLASS;
6723 return 1;
6725 else if (size < 64)
6727 classes[0] = X86_64_INTEGER_CLASS;
6728 return 1;
6730 else if (size < 64+32)
6732 classes[0] = X86_64_INTEGER_CLASS;
6733 classes[1] = X86_64_INTEGERSI_CLASS;
6734 return 2;
6736 else if (size < 64+64)
6738 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6739 return 2;
6741 else
6742 gcc_unreachable ();
6744 case CDImode:
6745 case TImode:
6746 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6747 return 2;
6748 case COImode:
6749 case OImode:
6750 /* OImode shouldn't be used directly. */
6751 gcc_unreachable ();
6752 case CTImode:
6753 return 0;
6754 case SFmode:
6755 if (!(bit_offset % 64))
6756 classes[0] = X86_64_SSESF_CLASS;
6757 else
6758 classes[0] = X86_64_SSE_CLASS;
6759 return 1;
6760 case DFmode:
6761 classes[0] = X86_64_SSEDF_CLASS;
6762 return 1;
6763 case XFmode:
6764 classes[0] = X86_64_X87_CLASS;
6765 classes[1] = X86_64_X87UP_CLASS;
6766 return 2;
6767 case TFmode:
6768 classes[0] = X86_64_SSE_CLASS;
6769 classes[1] = X86_64_SSEUP_CLASS;
6770 return 2;
6771 case SCmode:
6772 classes[0] = X86_64_SSE_CLASS;
6773 if (!(bit_offset % 64))
6774 return 1;
6775 else
6777 static bool warned;
6779 if (!warned && warn_psabi)
6781 warned = true;
6782 inform (input_location,
6783 "the ABI of passing structure with complex float"
6784 " member has changed in GCC 4.4");
6786 classes[1] = X86_64_SSESF_CLASS;
6787 return 2;
6789 case DCmode:
6790 classes[0] = X86_64_SSEDF_CLASS;
6791 classes[1] = X86_64_SSEDF_CLASS;
6792 return 2;
6793 case XCmode:
6794 classes[0] = X86_64_COMPLEX_X87_CLASS;
6795 return 1;
6796 case TCmode:
6797 /* This modes is larger than 16 bytes. */
6798 return 0;
6799 case V8SFmode:
6800 case V8SImode:
6801 case V32QImode:
6802 case V16HImode:
6803 case V4DFmode:
6804 case V4DImode:
6805 classes[0] = X86_64_SSE_CLASS;
6806 classes[1] = X86_64_SSEUP_CLASS;
6807 classes[2] = X86_64_SSEUP_CLASS;
6808 classes[3] = X86_64_SSEUP_CLASS;
6809 return 4;
6810 case V8DFmode:
6811 case V16SFmode:
6812 case V8DImode:
6813 case V16SImode:
6814 case V32HImode:
6815 case V64QImode:
6816 classes[0] = X86_64_SSE_CLASS;
6817 classes[1] = X86_64_SSEUP_CLASS;
6818 classes[2] = X86_64_SSEUP_CLASS;
6819 classes[3] = X86_64_SSEUP_CLASS;
6820 classes[4] = X86_64_SSEUP_CLASS;
6821 classes[5] = X86_64_SSEUP_CLASS;
6822 classes[6] = X86_64_SSEUP_CLASS;
6823 classes[7] = X86_64_SSEUP_CLASS;
6824 return 8;
6825 case V4SFmode:
6826 case V4SImode:
6827 case V16QImode:
6828 case V8HImode:
6829 case V2DFmode:
6830 case V2DImode:
6831 classes[0] = X86_64_SSE_CLASS;
6832 classes[1] = X86_64_SSEUP_CLASS;
6833 return 2;
6834 case V1TImode:
6835 case V1DImode:
6836 case V2SFmode:
6837 case V2SImode:
6838 case V4HImode:
6839 case V8QImode:
6840 classes[0] = X86_64_SSE_CLASS;
6841 return 1;
6842 case BLKmode:
6843 case VOIDmode:
6844 return 0;
6845 default:
6846 gcc_assert (VECTOR_MODE_P (mode));
6848 if (bytes > 16)
6849 return 0;
6851 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6853 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6854 classes[0] = X86_64_INTEGERSI_CLASS;
6855 else
6856 classes[0] = X86_64_INTEGER_CLASS;
6857 classes[1] = X86_64_INTEGER_CLASS;
6858 return 1 + (bytes > 8);
6862 /* Examine the argument and return set number of register required in each
6863 class. Return true iff parameter should be passed in memory. */
6865 static bool
6866 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6867 int *int_nregs, int *sse_nregs)
6869 enum x86_64_reg_class regclass[MAX_CLASSES];
6870 int n = classify_argument (mode, type, regclass, 0);
6872 *int_nregs = 0;
6873 *sse_nregs = 0;
6875 if (!n)
6876 return true;
6877 for (n--; n >= 0; n--)
6878 switch (regclass[n])
6880 case X86_64_INTEGER_CLASS:
6881 case X86_64_INTEGERSI_CLASS:
6882 (*int_nregs)++;
6883 break;
6884 case X86_64_SSE_CLASS:
6885 case X86_64_SSESF_CLASS:
6886 case X86_64_SSEDF_CLASS:
6887 (*sse_nregs)++;
6888 break;
6889 case X86_64_NO_CLASS:
6890 case X86_64_SSEUP_CLASS:
6891 break;
6892 case X86_64_X87_CLASS:
6893 case X86_64_X87UP_CLASS:
6894 case X86_64_COMPLEX_X87_CLASS:
6895 if (!in_return)
6896 return true;
6897 break;
6898 case X86_64_MEMORY_CLASS:
6899 gcc_unreachable ();
6902 return false;
6905 /* Construct container for the argument used by GCC interface. See
6906 FUNCTION_ARG for the detailed description. */
6908 static rtx
6909 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6910 const_tree type, int in_return, int nintregs, int nsseregs,
6911 const int *intreg, int sse_regno)
6913 /* The following variables hold the static issued_error state. */
6914 static bool issued_sse_arg_error;
6915 static bool issued_sse_ret_error;
6916 static bool issued_x87_ret_error;
6918 enum machine_mode tmpmode;
6919 int bytes =
6920 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6921 enum x86_64_reg_class regclass[MAX_CLASSES];
6922 int n;
6923 int i;
6924 int nexps = 0;
6925 int needed_sseregs, needed_intregs;
6926 rtx exp[MAX_CLASSES];
6927 rtx ret;
6929 n = classify_argument (mode, type, regclass, 0);
6930 if (!n)
6931 return NULL;
6932 if (examine_argument (mode, type, in_return, &needed_intregs,
6933 &needed_sseregs))
6934 return NULL;
6935 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6936 return NULL;
6938 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6939 some less clueful developer tries to use floating-point anyway. */
6940 if (needed_sseregs && !TARGET_SSE)
6942 if (in_return)
6944 if (!issued_sse_ret_error)
6946 error ("SSE register return with SSE disabled");
6947 issued_sse_ret_error = true;
6950 else if (!issued_sse_arg_error)
6952 error ("SSE register argument with SSE disabled");
6953 issued_sse_arg_error = true;
6955 return NULL;
6958 /* Likewise, error if the ABI requires us to return values in the
6959 x87 registers and the user specified -mno-80387. */
6960 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6961 for (i = 0; i < n; i++)
6962 if (regclass[i] == X86_64_X87_CLASS
6963 || regclass[i] == X86_64_X87UP_CLASS
6964 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6966 if (!issued_x87_ret_error)
6968 error ("x87 register return with x87 disabled");
6969 issued_x87_ret_error = true;
6971 return NULL;
6974 /* First construct simple cases. Avoid SCmode, since we want to use
6975 single register to pass this type. */
6976 if (n == 1 && mode != SCmode)
6977 switch (regclass[0])
6979 case X86_64_INTEGER_CLASS:
6980 case X86_64_INTEGERSI_CLASS:
6981 return gen_rtx_REG (mode, intreg[0]);
6982 case X86_64_SSE_CLASS:
6983 case X86_64_SSESF_CLASS:
6984 case X86_64_SSEDF_CLASS:
6985 if (mode != BLKmode)
6986 return gen_reg_or_parallel (mode, orig_mode,
6987 SSE_REGNO (sse_regno));
6988 break;
6989 case X86_64_X87_CLASS:
6990 case X86_64_COMPLEX_X87_CLASS:
6991 return gen_rtx_REG (mode, FIRST_STACK_REG);
6992 case X86_64_NO_CLASS:
6993 /* Zero sized array, struct or class. */
6994 return NULL;
6995 default:
6996 gcc_unreachable ();
6998 if (n == 2
6999 && regclass[0] == X86_64_SSE_CLASS
7000 && regclass[1] == X86_64_SSEUP_CLASS
7001 && mode != BLKmode)
7002 return gen_reg_or_parallel (mode, orig_mode,
7003 SSE_REGNO (sse_regno));
7004 if (n == 4
7005 && regclass[0] == X86_64_SSE_CLASS
7006 && regclass[1] == X86_64_SSEUP_CLASS
7007 && regclass[2] == X86_64_SSEUP_CLASS
7008 && regclass[3] == X86_64_SSEUP_CLASS
7009 && mode != BLKmode)
7010 return gen_reg_or_parallel (mode, orig_mode,
7011 SSE_REGNO (sse_regno));
7012 if (n == 8
7013 && regclass[0] == X86_64_SSE_CLASS
7014 && regclass[1] == X86_64_SSEUP_CLASS
7015 && regclass[2] == X86_64_SSEUP_CLASS
7016 && regclass[3] == X86_64_SSEUP_CLASS
7017 && regclass[4] == X86_64_SSEUP_CLASS
7018 && regclass[5] == X86_64_SSEUP_CLASS
7019 && regclass[6] == X86_64_SSEUP_CLASS
7020 && regclass[7] == X86_64_SSEUP_CLASS
7021 && mode != BLKmode)
7022 return gen_reg_or_parallel (mode, orig_mode,
7023 SSE_REGNO (sse_regno));
7024 if (n == 2
7025 && regclass[0] == X86_64_X87_CLASS
7026 && regclass[1] == X86_64_X87UP_CLASS)
7027 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7029 if (n == 2
7030 && regclass[0] == X86_64_INTEGER_CLASS
7031 && regclass[1] == X86_64_INTEGER_CLASS
7032 && (mode == CDImode || mode == TImode)
7033 && intreg[0] + 1 == intreg[1])
7034 return gen_rtx_REG (mode, intreg[0]);
7036 /* Otherwise figure out the entries of the PARALLEL. */
7037 for (i = 0; i < n; i++)
7039 int pos;
7041 switch (regclass[i])
7043 case X86_64_NO_CLASS:
7044 break;
7045 case X86_64_INTEGER_CLASS:
7046 case X86_64_INTEGERSI_CLASS:
7047 /* Merge TImodes on aligned occasions here too. */
7048 if (i * 8 + 8 > bytes)
7049 tmpmode
7050 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7051 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7052 tmpmode = SImode;
7053 else
7054 tmpmode = DImode;
7055 /* We've requested 24 bytes we
7056 don't have mode for. Use DImode. */
7057 if (tmpmode == BLKmode)
7058 tmpmode = DImode;
7059 exp [nexps++]
7060 = gen_rtx_EXPR_LIST (VOIDmode,
7061 gen_rtx_REG (tmpmode, *intreg),
7062 GEN_INT (i*8));
7063 intreg++;
7064 break;
7065 case X86_64_SSESF_CLASS:
7066 exp [nexps++]
7067 = gen_rtx_EXPR_LIST (VOIDmode,
7068 gen_rtx_REG (SFmode,
7069 SSE_REGNO (sse_regno)),
7070 GEN_INT (i*8));
7071 sse_regno++;
7072 break;
7073 case X86_64_SSEDF_CLASS:
7074 exp [nexps++]
7075 = gen_rtx_EXPR_LIST (VOIDmode,
7076 gen_rtx_REG (DFmode,
7077 SSE_REGNO (sse_regno)),
7078 GEN_INT (i*8));
7079 sse_regno++;
7080 break;
7081 case X86_64_SSE_CLASS:
7082 pos = i;
7083 switch (n)
7085 case 1:
7086 tmpmode = DImode;
7087 break;
7088 case 2:
7089 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7091 tmpmode = TImode;
7092 i++;
7094 else
7095 tmpmode = DImode;
7096 break;
7097 case 4:
7098 gcc_assert (i == 0
7099 && regclass[1] == X86_64_SSEUP_CLASS
7100 && regclass[2] == X86_64_SSEUP_CLASS
7101 && regclass[3] == X86_64_SSEUP_CLASS);
7102 tmpmode = OImode;
7103 i += 3;
7104 break;
7105 case 8:
7106 gcc_assert (i == 0
7107 && regclass[1] == X86_64_SSEUP_CLASS
7108 && regclass[2] == X86_64_SSEUP_CLASS
7109 && regclass[3] == X86_64_SSEUP_CLASS
7110 && regclass[4] == X86_64_SSEUP_CLASS
7111 && regclass[5] == X86_64_SSEUP_CLASS
7112 && regclass[6] == X86_64_SSEUP_CLASS
7113 && regclass[7] == X86_64_SSEUP_CLASS);
7114 tmpmode = XImode;
7115 i += 7;
7116 break;
7117 default:
7118 gcc_unreachable ();
7120 exp [nexps++]
7121 = gen_rtx_EXPR_LIST (VOIDmode,
7122 gen_rtx_REG (tmpmode,
7123 SSE_REGNO (sse_regno)),
7124 GEN_INT (pos*8));
7125 sse_regno++;
7126 break;
7127 default:
7128 gcc_unreachable ();
7132 /* Empty aligned struct, union or class. */
7133 if (nexps == 0)
7134 return NULL;
7136 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7137 for (i = 0; i < nexps; i++)
7138 XVECEXP (ret, 0, i) = exp [i];
7139 return ret;
7142 /* Update the data in CUM to advance over an argument of mode MODE
7143 and data type TYPE. (TYPE is null for libcalls where that information
7144 may not be available.) */
7146 static void
7147 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7148 const_tree type, HOST_WIDE_INT bytes,
7149 HOST_WIDE_INT words)
7151 switch (mode)
7153 default:
7154 break;
7156 case BLKmode:
7157 if (bytes < 0)
7158 break;
7159 /* FALLTHRU */
7161 case DImode:
7162 case SImode:
7163 case HImode:
7164 case QImode:
7165 cum->words += words;
7166 cum->nregs -= words;
7167 cum->regno += words;
7169 if (cum->nregs <= 0)
7171 cum->nregs = 0;
7172 cum->regno = 0;
7174 break;
7176 case OImode:
7177 /* OImode shouldn't be used directly. */
7178 gcc_unreachable ();
7180 case DFmode:
7181 if (cum->float_in_sse < 2)
7182 break;
7183 case SFmode:
7184 if (cum->float_in_sse < 1)
7185 break;
7186 /* FALLTHRU */
7188 case V8SFmode:
7189 case V8SImode:
7190 case V64QImode:
7191 case V32HImode:
7192 case V16SImode:
7193 case V8DImode:
7194 case V16SFmode:
7195 case V8DFmode:
7196 case V32QImode:
7197 case V16HImode:
7198 case V4DFmode:
7199 case V4DImode:
7200 case TImode:
7201 case V16QImode:
7202 case V8HImode:
7203 case V4SImode:
7204 case V2DImode:
7205 case V4SFmode:
7206 case V2DFmode:
7207 if (!type || !AGGREGATE_TYPE_P (type))
7209 cum->sse_words += words;
7210 cum->sse_nregs -= 1;
7211 cum->sse_regno += 1;
7212 if (cum->sse_nregs <= 0)
7214 cum->sse_nregs = 0;
7215 cum->sse_regno = 0;
7218 break;
7220 case V8QImode:
7221 case V4HImode:
7222 case V2SImode:
7223 case V2SFmode:
7224 case V1TImode:
7225 case V1DImode:
7226 if (!type || !AGGREGATE_TYPE_P (type))
7228 cum->mmx_words += words;
7229 cum->mmx_nregs -= 1;
7230 cum->mmx_regno += 1;
7231 if (cum->mmx_nregs <= 0)
7233 cum->mmx_nregs = 0;
7234 cum->mmx_regno = 0;
7237 break;
7241 static void
7242 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7243 const_tree type, HOST_WIDE_INT words, bool named)
7245 int int_nregs, sse_nregs;
7247 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7248 if (!named && (VALID_AVX512F_REG_MODE (mode)
7249 || VALID_AVX256_REG_MODE (mode)))
7250 return;
7252 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7253 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7255 cum->nregs -= int_nregs;
7256 cum->sse_nregs -= sse_nregs;
7257 cum->regno += int_nregs;
7258 cum->sse_regno += sse_nregs;
7260 else
7262 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7263 cum->words = (cum->words + align - 1) & ~(align - 1);
7264 cum->words += words;
7268 static void
7269 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7270 HOST_WIDE_INT words)
7272 /* Otherwise, this should be passed indirect. */
7273 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7275 cum->words += words;
7276 if (cum->nregs > 0)
7278 cum->nregs -= 1;
7279 cum->regno += 1;
7283 /* Update the data in CUM to advance over an argument of mode MODE and
7284 data type TYPE. (TYPE is null for libcalls where that information
7285 may not be available.) */
7287 static void
7288 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7289 const_tree type, bool named)
7291 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7292 HOST_WIDE_INT bytes, words;
7294 if (mode == BLKmode)
7295 bytes = int_size_in_bytes (type);
7296 else
7297 bytes = GET_MODE_SIZE (mode);
7298 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7300 if (type)
7301 mode = type_natural_mode (type, NULL, false);
7303 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7304 function_arg_advance_ms_64 (cum, bytes, words);
7305 else if (TARGET_64BIT)
7306 function_arg_advance_64 (cum, mode, type, words, named);
7307 else
7308 function_arg_advance_32 (cum, mode, type, bytes, words);
7311 /* Define where to put the arguments to a function.
7312 Value is zero to push the argument on the stack,
7313 or a hard register in which to store the argument.
7315 MODE is the argument's machine mode.
7316 TYPE is the data type of the argument (as a tree).
7317 This is null for libcalls where that information may
7318 not be available.
7319 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7320 the preceding args and about the function being called.
7321 NAMED is nonzero if this argument is a named parameter
7322 (otherwise it is an extra parameter matching an ellipsis). */
7324 static rtx
7325 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7326 enum machine_mode orig_mode, const_tree type,
7327 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7329 /* Avoid the AL settings for the Unix64 ABI. */
7330 if (mode == VOIDmode)
7331 return constm1_rtx;
7333 switch (mode)
7335 default:
7336 break;
7338 case BLKmode:
7339 if (bytes < 0)
7340 break;
7341 /* FALLTHRU */
7342 case DImode:
7343 case SImode:
7344 case HImode:
7345 case QImode:
7346 if (words <= cum->nregs)
7348 int regno = cum->regno;
7350 /* Fastcall allocates the first two DWORD (SImode) or
7351 smaller arguments to ECX and EDX if it isn't an
7352 aggregate type . */
7353 if (cum->fastcall)
7355 if (mode == BLKmode
7356 || mode == DImode
7357 || (type && AGGREGATE_TYPE_P (type)))
7358 break;
7360 /* ECX not EAX is the first allocated register. */
7361 if (regno == AX_REG)
7362 regno = CX_REG;
7364 return gen_rtx_REG (mode, regno);
7366 break;
7368 case DFmode:
7369 if (cum->float_in_sse < 2)
7370 break;
7371 case SFmode:
7372 if (cum->float_in_sse < 1)
7373 break;
7374 /* FALLTHRU */
7375 case TImode:
7376 /* In 32bit, we pass TImode in xmm registers. */
7377 case V16QImode:
7378 case V8HImode:
7379 case V4SImode:
7380 case V2DImode:
7381 case V4SFmode:
7382 case V2DFmode:
7383 if (!type || !AGGREGATE_TYPE_P (type))
7385 if (cum->sse_nregs)
7386 return gen_reg_or_parallel (mode, orig_mode,
7387 cum->sse_regno + FIRST_SSE_REG);
7389 break;
7391 case OImode:
7392 case XImode:
7393 /* OImode and XImode shouldn't be used directly. */
7394 gcc_unreachable ();
7396 case V64QImode:
7397 case V32HImode:
7398 case V16SImode:
7399 case V8DImode:
7400 case V16SFmode:
7401 case V8DFmode:
7402 case V8SFmode:
7403 case V8SImode:
7404 case V32QImode:
7405 case V16HImode:
7406 case V4DFmode:
7407 case V4DImode:
7408 if (!type || !AGGREGATE_TYPE_P (type))
7410 if (cum->sse_nregs)
7411 return gen_reg_or_parallel (mode, orig_mode,
7412 cum->sse_regno + FIRST_SSE_REG);
7414 break;
7416 case V8QImode:
7417 case V4HImode:
7418 case V2SImode:
7419 case V2SFmode:
7420 case V1TImode:
7421 case V1DImode:
7422 if (!type || !AGGREGATE_TYPE_P (type))
7424 if (cum->mmx_nregs)
7425 return gen_reg_or_parallel (mode, orig_mode,
7426 cum->mmx_regno + FIRST_MMX_REG);
7428 break;
7431 return NULL_RTX;
7434 static rtx
7435 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7436 enum machine_mode orig_mode, const_tree type, bool named)
7438 /* Handle a hidden AL argument containing number of registers
7439 for varargs x86-64 functions. */
7440 if (mode == VOIDmode)
7441 return GEN_INT (cum->maybe_vaarg
7442 ? (cum->sse_nregs < 0
7443 ? X86_64_SSE_REGPARM_MAX
7444 : cum->sse_regno)
7445 : -1);
7447 switch (mode)
7449 default:
7450 break;
7452 case V8SFmode:
7453 case V8SImode:
7454 case V32QImode:
7455 case V16HImode:
7456 case V4DFmode:
7457 case V4DImode:
7458 case V16SFmode:
7459 case V16SImode:
7460 case V64QImode:
7461 case V32HImode:
7462 case V8DFmode:
7463 case V8DImode:
7464 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7465 if (!named)
7466 return NULL;
7467 break;
7470 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7471 cum->sse_nregs,
7472 &x86_64_int_parameter_registers [cum->regno],
7473 cum->sse_regno);
7476 static rtx
7477 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7478 enum machine_mode orig_mode, bool named,
7479 HOST_WIDE_INT bytes)
7481 unsigned int regno;
7483 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7484 We use value of -2 to specify that current function call is MSABI. */
7485 if (mode == VOIDmode)
7486 return GEN_INT (-2);
7488 /* If we've run out of registers, it goes on the stack. */
7489 if (cum->nregs == 0)
7490 return NULL_RTX;
7492 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7494 /* Only floating point modes are passed in anything but integer regs. */
7495 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7497 if (named)
7498 regno = cum->regno + FIRST_SSE_REG;
7499 else
7501 rtx t1, t2;
7503 /* Unnamed floating parameters are passed in both the
7504 SSE and integer registers. */
7505 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7506 t2 = gen_rtx_REG (mode, regno);
7507 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7508 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7509 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7512 /* Handle aggregated types passed in register. */
7513 if (orig_mode == BLKmode)
7515 if (bytes > 0 && bytes <= 8)
7516 mode = (bytes > 4 ? DImode : SImode);
7517 if (mode == BLKmode)
7518 mode = DImode;
7521 return gen_reg_or_parallel (mode, orig_mode, regno);
7524 /* Return where to put the arguments to a function.
7525 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7527 MODE is the argument's machine mode. TYPE is the data type of the
7528 argument. It is null for libcalls where that information may not be
7529 available. CUM gives information about the preceding args and about
7530 the function being called. NAMED is nonzero if this argument is a
7531 named parameter (otherwise it is an extra parameter matching an
7532 ellipsis). */
7534 static rtx
7535 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7536 const_tree type, bool named)
7538 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7539 enum machine_mode mode = omode;
7540 HOST_WIDE_INT bytes, words;
7541 rtx arg;
7543 if (mode == BLKmode)
7544 bytes = int_size_in_bytes (type);
7545 else
7546 bytes = GET_MODE_SIZE (mode);
7547 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7549 /* To simplify the code below, represent vector types with a vector mode
7550 even if MMX/SSE are not active. */
7551 if (type && TREE_CODE (type) == VECTOR_TYPE)
7552 mode = type_natural_mode (type, cum, false);
7554 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7555 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7556 else if (TARGET_64BIT)
7557 arg = function_arg_64 (cum, mode, omode, type, named);
7558 else
7559 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7561 return arg;
7564 /* A C expression that indicates when an argument must be passed by
7565 reference. If nonzero for an argument, a copy of that argument is
7566 made in memory and a pointer to the argument is passed instead of
7567 the argument itself. The pointer is passed in whatever way is
7568 appropriate for passing a pointer to that type. */
7570 static bool
7571 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7572 const_tree type, bool)
7574 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7576 /* See Windows x64 Software Convention. */
7577 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7579 int msize = (int) GET_MODE_SIZE (mode);
7580 if (type)
7582 /* Arrays are passed by reference. */
7583 if (TREE_CODE (type) == ARRAY_TYPE)
7584 return true;
7586 if (AGGREGATE_TYPE_P (type))
7588 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7589 are passed by reference. */
7590 msize = int_size_in_bytes (type);
7594 /* __m128 is passed by reference. */
7595 switch (msize) {
7596 case 1: case 2: case 4: case 8:
7597 break;
7598 default:
7599 return true;
7602 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7603 return 1;
7605 return 0;
7608 /* Return true when TYPE should be 128bit aligned for 32bit argument
7609 passing ABI. XXX: This function is obsolete and is only used for
7610 checking psABI compatibility with previous versions of GCC. */
7612 static bool
7613 ix86_compat_aligned_value_p (const_tree type)
7615 enum machine_mode mode = TYPE_MODE (type);
7616 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7617 || mode == TDmode
7618 || mode == TFmode
7619 || mode == TCmode)
7620 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7621 return true;
7622 if (TYPE_ALIGN (type) < 128)
7623 return false;
7625 if (AGGREGATE_TYPE_P (type))
7627 /* Walk the aggregates recursively. */
7628 switch (TREE_CODE (type))
7630 case RECORD_TYPE:
7631 case UNION_TYPE:
7632 case QUAL_UNION_TYPE:
7634 tree field;
7636 /* Walk all the structure fields. */
7637 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7639 if (TREE_CODE (field) == FIELD_DECL
7640 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7641 return true;
7643 break;
7646 case ARRAY_TYPE:
7647 /* Just for use if some languages passes arrays by value. */
7648 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7649 return true;
7650 break;
7652 default:
7653 gcc_unreachable ();
7656 return false;
7659 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7660 XXX: This function is obsolete and is only used for checking psABI
7661 compatibility with previous versions of GCC. */
7663 static unsigned int
7664 ix86_compat_function_arg_boundary (enum machine_mode mode,
7665 const_tree type, unsigned int align)
7667 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7668 natural boundaries. */
7669 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7671 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7672 make an exception for SSE modes since these require 128bit
7673 alignment.
7675 The handling here differs from field_alignment. ICC aligns MMX
7676 arguments to 4 byte boundaries, while structure fields are aligned
7677 to 8 byte boundaries. */
7678 if (!type)
7680 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7681 align = PARM_BOUNDARY;
7683 else
7685 if (!ix86_compat_aligned_value_p (type))
7686 align = PARM_BOUNDARY;
7689 if (align > BIGGEST_ALIGNMENT)
7690 align = BIGGEST_ALIGNMENT;
7691 return align;
7694 /* Return true when TYPE should be 128bit aligned for 32bit argument
7695 passing ABI. */
7697 static bool
7698 ix86_contains_aligned_value_p (const_tree type)
7700 enum machine_mode mode = TYPE_MODE (type);
7702 if (mode == XFmode || mode == XCmode)
7703 return false;
7705 if (TYPE_ALIGN (type) < 128)
7706 return false;
7708 if (AGGREGATE_TYPE_P (type))
7710 /* Walk the aggregates recursively. */
7711 switch (TREE_CODE (type))
7713 case RECORD_TYPE:
7714 case UNION_TYPE:
7715 case QUAL_UNION_TYPE:
7717 tree field;
7719 /* Walk all the structure fields. */
7720 for (field = TYPE_FIELDS (type);
7721 field;
7722 field = DECL_CHAIN (field))
7724 if (TREE_CODE (field) == FIELD_DECL
7725 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7726 return true;
7728 break;
7731 case ARRAY_TYPE:
7732 /* Just for use if some languages passes arrays by value. */
7733 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7734 return true;
7735 break;
7737 default:
7738 gcc_unreachable ();
7741 else
7742 return TYPE_ALIGN (type) >= 128;
7744 return false;
7747 /* Gives the alignment boundary, in bits, of an argument with the
7748 specified mode and type. */
7750 static unsigned int
7751 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7753 unsigned int align;
7754 if (type)
7756 /* Since the main variant type is used for call, we convert it to
7757 the main variant type. */
7758 type = TYPE_MAIN_VARIANT (type);
7759 align = TYPE_ALIGN (type);
7761 else
7762 align = GET_MODE_ALIGNMENT (mode);
7763 if (align < PARM_BOUNDARY)
7764 align = PARM_BOUNDARY;
7765 else
7767 static bool warned;
7768 unsigned int saved_align = align;
7770 if (!TARGET_64BIT)
7772 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7773 if (!type)
7775 if (mode == XFmode || mode == XCmode)
7776 align = PARM_BOUNDARY;
7778 else if (!ix86_contains_aligned_value_p (type))
7779 align = PARM_BOUNDARY;
7781 if (align < 128)
7782 align = PARM_BOUNDARY;
7785 if (warn_psabi
7786 && !warned
7787 && align != ix86_compat_function_arg_boundary (mode, type,
7788 saved_align))
7790 warned = true;
7791 inform (input_location,
7792 "The ABI for passing parameters with %d-byte"
7793 " alignment has changed in GCC 4.6",
7794 align / BITS_PER_UNIT);
7798 return align;
7801 /* Return true if N is a possible register number of function value. */
7803 static bool
7804 ix86_function_value_regno_p (const unsigned int regno)
7806 switch (regno)
7808 case AX_REG:
7809 return true;
7810 case DX_REG:
7811 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7812 case DI_REG:
7813 case SI_REG:
7814 return TARGET_64BIT && ix86_abi != MS_ABI;
7816 /* Complex values are returned in %st(0)/%st(1) pair. */
7817 case ST0_REG:
7818 case ST1_REG:
7819 /* TODO: The function should depend on current function ABI but
7820 builtins.c would need updating then. Therefore we use the
7821 default ABI. */
7822 if (TARGET_64BIT && ix86_abi == MS_ABI)
7823 return false;
7824 return TARGET_FLOAT_RETURNS_IN_80387;
7826 /* Complex values are returned in %xmm0/%xmm1 pair. */
7827 case XMM0_REG:
7828 case XMM1_REG:
7829 return TARGET_SSE;
7831 case MM0_REG:
7832 if (TARGET_MACHO || TARGET_64BIT)
7833 return false;
7834 return TARGET_MMX;
7837 return false;
7840 /* Define how to find the value returned by a function.
7841 VALTYPE is the data type of the value (as a tree).
7842 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7843 otherwise, FUNC is 0. */
7845 static rtx
7846 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7847 const_tree fntype, const_tree fn)
7849 unsigned int regno;
7851 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7852 we normally prevent this case when mmx is not available. However
7853 some ABIs may require the result to be returned like DImode. */
7854 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7855 regno = FIRST_MMX_REG;
7857 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7858 we prevent this case when sse is not available. However some ABIs
7859 may require the result to be returned like integer TImode. */
7860 else if (mode == TImode
7861 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7862 regno = FIRST_SSE_REG;
7864 /* 32-byte vector modes in %ymm0. */
7865 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7866 regno = FIRST_SSE_REG;
7868 /* 64-byte vector modes in %zmm0. */
7869 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7870 regno = FIRST_SSE_REG;
7872 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7873 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7874 regno = FIRST_FLOAT_REG;
7875 else
7876 /* Most things go in %eax. */
7877 regno = AX_REG;
7879 /* Override FP return register with %xmm0 for local functions when
7880 SSE math is enabled or for functions with sseregparm attribute. */
7881 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7883 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7884 if ((sse_level >= 1 && mode == SFmode)
7885 || (sse_level == 2 && mode == DFmode))
7886 regno = FIRST_SSE_REG;
7889 /* OImode shouldn't be used directly. */
7890 gcc_assert (mode != OImode);
7892 return gen_rtx_REG (orig_mode, regno);
7895 static rtx
7896 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7897 const_tree valtype)
7899 rtx ret;
7901 /* Handle libcalls, which don't provide a type node. */
7902 if (valtype == NULL)
7904 unsigned int regno;
7906 switch (mode)
7908 case SFmode:
7909 case SCmode:
7910 case DFmode:
7911 case DCmode:
7912 case TFmode:
7913 case SDmode:
7914 case DDmode:
7915 case TDmode:
7916 regno = FIRST_SSE_REG;
7917 break;
7918 case XFmode:
7919 case XCmode:
7920 regno = FIRST_FLOAT_REG;
7921 break;
7922 case TCmode:
7923 return NULL;
7924 default:
7925 regno = AX_REG;
7928 return gen_rtx_REG (mode, regno);
7930 else if (POINTER_TYPE_P (valtype)
7931 && !upc_shared_type_p (TREE_TYPE (valtype)))
7933 /* Pointers are always returned in word_mode. */
7934 mode = word_mode;
7937 ret = construct_container (mode, orig_mode, valtype, 1,
7938 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7939 x86_64_int_return_registers, 0);
7941 /* For zero sized structures, construct_container returns NULL, but we
7942 need to keep rest of compiler happy by returning meaningful value. */
7943 if (!ret)
7944 ret = gen_rtx_REG (orig_mode, AX_REG);
7946 return ret;
7949 static rtx
7950 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7951 const_tree valtype)
7953 unsigned int regno = AX_REG;
7955 if (TARGET_SSE)
7957 switch (GET_MODE_SIZE (mode))
7959 case 16:
7960 if (valtype != NULL_TREE
7961 && !VECTOR_INTEGER_TYPE_P (valtype)
7962 && !VECTOR_INTEGER_TYPE_P (valtype)
7963 && !INTEGRAL_TYPE_P (valtype)
7964 && !VECTOR_FLOAT_TYPE_P (valtype))
7965 break;
7966 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7967 && !COMPLEX_MODE_P (mode))
7968 regno = FIRST_SSE_REG;
7969 break;
7970 case 8:
7971 case 4:
7972 if (mode == SFmode || mode == DFmode)
7973 regno = FIRST_SSE_REG;
7974 break;
7975 default:
7976 break;
7979 return gen_rtx_REG (orig_mode, regno);
7982 static rtx
7983 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7984 enum machine_mode orig_mode, enum machine_mode mode)
7986 const_tree fn, fntype;
7988 fn = NULL_TREE;
7989 if (fntype_or_decl && DECL_P (fntype_or_decl))
7990 fn = fntype_or_decl;
7991 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7993 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7994 return function_value_ms_64 (orig_mode, mode, valtype);
7995 else if (TARGET_64BIT)
7996 return function_value_64 (orig_mode, mode, valtype);
7997 else
7998 return function_value_32 (orig_mode, mode, fntype, fn);
8001 static rtx
8002 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8004 enum machine_mode mode, orig_mode;
8006 orig_mode = TYPE_MODE (valtype);
8007 mode = type_natural_mode (valtype, NULL, true);
8008 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8011 /* Pointer function arguments and return values are promoted to
8012 word_mode. */
8014 static enum machine_mode
8015 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8016 int *punsignedp, const_tree fntype,
8017 int for_return)
8019 if (type != NULL_TREE && POINTER_TYPE_P (type))
8021 if (upc_shared_type_p (TREE_TYPE (type)))
8023 *punsignedp = 1;
8024 return TYPE_MODE (upc_pts_rep_type_node);
8026 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8027 return word_mode;
8029 return default_promote_function_mode (type, mode, punsignedp, fntype,
8030 for_return);
8033 /* Return true if a structure, union or array with MODE containing FIELD
8034 should be accessed using BLKmode. */
8036 static bool
8037 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8039 /* Union with XFmode must be in BLKmode. */
8040 return (mode == XFmode
8041 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8042 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8046 ix86_libcall_value (enum machine_mode mode)
8048 return ix86_function_value_1 (NULL, NULL, mode, mode);
8051 /* Return true iff type is returned in memory. */
8053 static bool
8054 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8056 #ifdef SUBTARGET_RETURN_IN_MEMORY
8057 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8058 #else
8059 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8060 HOST_WIDE_INT size;
8062 if (TARGET_64BIT)
8064 if (ix86_function_type_abi (fntype) == MS_ABI)
8066 size = int_size_in_bytes (type);
8068 /* __m128 is returned in xmm0. */
8069 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8070 || INTEGRAL_TYPE_P (type)
8071 || VECTOR_FLOAT_TYPE_P (type))
8072 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8073 && !COMPLEX_MODE_P (mode)
8074 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8075 return false;
8077 /* Otherwise, the size must be exactly in [1248]. */
8078 return size != 1 && size != 2 && size != 4 && size != 8;
8080 else
8082 int needed_intregs, needed_sseregs;
8084 return examine_argument (mode, type, 1,
8085 &needed_intregs, &needed_sseregs);
8088 else
8090 if (mode == BLKmode)
8091 return true;
8093 size = int_size_in_bytes (type);
8095 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8096 return false;
8098 if (VECTOR_MODE_P (mode) || mode == TImode)
8100 /* User-created vectors small enough to fit in EAX. */
8101 if (size < 8)
8102 return false;
8104 /* Unless ABI prescibes otherwise,
8105 MMX/3dNow values are returned in MM0 if available. */
8107 if (size == 8)
8108 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8110 /* SSE values are returned in XMM0 if available. */
8111 if (size == 16)
8112 return !TARGET_SSE;
8114 /* AVX values are returned in YMM0 if available. */
8115 if (size == 32)
8116 return !TARGET_AVX;
8118 /* AVX512F values are returned in ZMM0 if available. */
8119 if (size == 64)
8120 return !TARGET_AVX512F;
8123 if (mode == XFmode)
8124 return false;
8126 if (size > 12)
8127 return true;
8129 /* OImode shouldn't be used directly. */
8130 gcc_assert (mode != OImode);
8132 return false;
8134 #endif
8138 /* Create the va_list data type. */
8140 /* Returns the calling convention specific va_list date type.
8141 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8143 static tree
8144 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8146 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8148 /* For i386 we use plain pointer to argument area. */
8149 if (!TARGET_64BIT || abi == MS_ABI)
8150 return build_pointer_type (char_type_node);
8152 record = lang_hooks.types.make_type (RECORD_TYPE);
8153 type_decl = build_decl (BUILTINS_LOCATION,
8154 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8156 f_gpr = build_decl (BUILTINS_LOCATION,
8157 FIELD_DECL, get_identifier ("gp_offset"),
8158 unsigned_type_node);
8159 f_fpr = build_decl (BUILTINS_LOCATION,
8160 FIELD_DECL, get_identifier ("fp_offset"),
8161 unsigned_type_node);
8162 f_ovf = build_decl (BUILTINS_LOCATION,
8163 FIELD_DECL, get_identifier ("overflow_arg_area"),
8164 ptr_type_node);
8165 f_sav = build_decl (BUILTINS_LOCATION,
8166 FIELD_DECL, get_identifier ("reg_save_area"),
8167 ptr_type_node);
8169 va_list_gpr_counter_field = f_gpr;
8170 va_list_fpr_counter_field = f_fpr;
8172 DECL_FIELD_CONTEXT (f_gpr) = record;
8173 DECL_FIELD_CONTEXT (f_fpr) = record;
8174 DECL_FIELD_CONTEXT (f_ovf) = record;
8175 DECL_FIELD_CONTEXT (f_sav) = record;
8177 TYPE_STUB_DECL (record) = type_decl;
8178 TYPE_NAME (record) = type_decl;
8179 TYPE_FIELDS (record) = f_gpr;
8180 DECL_CHAIN (f_gpr) = f_fpr;
8181 DECL_CHAIN (f_fpr) = f_ovf;
8182 DECL_CHAIN (f_ovf) = f_sav;
8184 layout_type (record);
8186 /* The correct type is an array type of one element. */
8187 return build_array_type (record, build_index_type (size_zero_node));
8190 /* Setup the builtin va_list data type and for 64-bit the additional
8191 calling convention specific va_list data types. */
8193 static tree
8194 ix86_build_builtin_va_list (void)
8196 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8198 /* Initialize abi specific va_list builtin types. */
8199 if (TARGET_64BIT)
8201 tree t;
8202 if (ix86_abi == MS_ABI)
8204 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8205 if (TREE_CODE (t) != RECORD_TYPE)
8206 t = build_variant_type_copy (t);
8207 sysv_va_list_type_node = t;
8209 else
8211 t = ret;
8212 if (TREE_CODE (t) != RECORD_TYPE)
8213 t = build_variant_type_copy (t);
8214 sysv_va_list_type_node = t;
8216 if (ix86_abi != MS_ABI)
8218 t = ix86_build_builtin_va_list_abi (MS_ABI);
8219 if (TREE_CODE (t) != RECORD_TYPE)
8220 t = build_variant_type_copy (t);
8221 ms_va_list_type_node = t;
8223 else
8225 t = ret;
8226 if (TREE_CODE (t) != RECORD_TYPE)
8227 t = build_variant_type_copy (t);
8228 ms_va_list_type_node = t;
8232 return ret;
8235 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8237 static void
8238 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8240 rtx save_area, mem;
8241 alias_set_type set;
8242 int i, max;
8244 /* GPR size of varargs save area. */
8245 if (cfun->va_list_gpr_size)
8246 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8247 else
8248 ix86_varargs_gpr_size = 0;
8250 /* FPR size of varargs save area. We don't need it if we don't pass
8251 anything in SSE registers. */
8252 if (TARGET_SSE && cfun->va_list_fpr_size)
8253 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8254 else
8255 ix86_varargs_fpr_size = 0;
8257 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8258 return;
8260 save_area = frame_pointer_rtx;
8261 set = get_varargs_alias_set ();
8263 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8264 if (max > X86_64_REGPARM_MAX)
8265 max = X86_64_REGPARM_MAX;
8267 for (i = cum->regno; i < max; i++)
8269 mem = gen_rtx_MEM (word_mode,
8270 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8271 MEM_NOTRAP_P (mem) = 1;
8272 set_mem_alias_set (mem, set);
8273 emit_move_insn (mem,
8274 gen_rtx_REG (word_mode,
8275 x86_64_int_parameter_registers[i]));
8278 if (ix86_varargs_fpr_size)
8280 enum machine_mode smode;
8281 rtx_code_label *label;
8282 rtx test;
8284 /* Now emit code to save SSE registers. The AX parameter contains number
8285 of SSE parameter registers used to call this function, though all we
8286 actually check here is the zero/non-zero status. */
8288 label = gen_label_rtx ();
8289 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8290 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8291 label));
8293 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8294 we used movdqa (i.e. TImode) instead? Perhaps even better would
8295 be if we could determine the real mode of the data, via a hook
8296 into pass_stdarg. Ignore all that for now. */
8297 smode = V4SFmode;
8298 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8299 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8301 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8302 if (max > X86_64_SSE_REGPARM_MAX)
8303 max = X86_64_SSE_REGPARM_MAX;
8305 for (i = cum->sse_regno; i < max; ++i)
8307 mem = plus_constant (Pmode, save_area,
8308 i * 16 + ix86_varargs_gpr_size);
8309 mem = gen_rtx_MEM (smode, mem);
8310 MEM_NOTRAP_P (mem) = 1;
8311 set_mem_alias_set (mem, set);
8312 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8314 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8317 emit_label (label);
8321 static void
8322 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8324 alias_set_type set = get_varargs_alias_set ();
8325 int i;
8327 /* Reset to zero, as there might be a sysv vaarg used
8328 before. */
8329 ix86_varargs_gpr_size = 0;
8330 ix86_varargs_fpr_size = 0;
8332 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8334 rtx reg, mem;
8336 mem = gen_rtx_MEM (Pmode,
8337 plus_constant (Pmode, virtual_incoming_args_rtx,
8338 i * UNITS_PER_WORD));
8339 MEM_NOTRAP_P (mem) = 1;
8340 set_mem_alias_set (mem, set);
8342 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8343 emit_move_insn (mem, reg);
8347 static void
8348 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8349 tree type, int *, int no_rtl)
8351 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8352 CUMULATIVE_ARGS next_cum;
8353 tree fntype;
8355 /* This argument doesn't appear to be used anymore. Which is good,
8356 because the old code here didn't suppress rtl generation. */
8357 gcc_assert (!no_rtl);
8359 if (!TARGET_64BIT)
8360 return;
8362 fntype = TREE_TYPE (current_function_decl);
8364 /* For varargs, we do not want to skip the dummy va_dcl argument.
8365 For stdargs, we do want to skip the last named argument. */
8366 next_cum = *cum;
8367 if (stdarg_p (fntype))
8368 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8369 true);
8371 if (cum->call_abi == MS_ABI)
8372 setup_incoming_varargs_ms_64 (&next_cum);
8373 else
8374 setup_incoming_varargs_64 (&next_cum);
8377 /* Checks if TYPE is of kind va_list char *. */
8379 static bool
8380 is_va_list_char_pointer (tree type)
8382 tree canonic;
8384 /* For 32-bit it is always true. */
8385 if (!TARGET_64BIT)
8386 return true;
8387 canonic = ix86_canonical_va_list_type (type);
8388 return (canonic == ms_va_list_type_node
8389 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8392 /* Implement va_start. */
8394 static void
8395 ix86_va_start (tree valist, rtx nextarg)
8397 HOST_WIDE_INT words, n_gpr, n_fpr;
8398 tree f_gpr, f_fpr, f_ovf, f_sav;
8399 tree gpr, fpr, ovf, sav, t;
8400 tree type;
8401 rtx ovf_rtx;
8403 if (flag_split_stack
8404 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8406 unsigned int scratch_regno;
8408 /* When we are splitting the stack, we can't refer to the stack
8409 arguments using internal_arg_pointer, because they may be on
8410 the old stack. The split stack prologue will arrange to
8411 leave a pointer to the old stack arguments in a scratch
8412 register, which we here copy to a pseudo-register. The split
8413 stack prologue can't set the pseudo-register directly because
8414 it (the prologue) runs before any registers have been saved. */
8416 scratch_regno = split_stack_prologue_scratch_regno ();
8417 if (scratch_regno != INVALID_REGNUM)
8419 rtx reg;
8420 rtx_insn *seq;
8422 reg = gen_reg_rtx (Pmode);
8423 cfun->machine->split_stack_varargs_pointer = reg;
8425 start_sequence ();
8426 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8427 seq = get_insns ();
8428 end_sequence ();
8430 push_topmost_sequence ();
8431 emit_insn_after (seq, entry_of_function ());
8432 pop_topmost_sequence ();
8436 /* Only 64bit target needs something special. */
8437 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8439 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8440 std_expand_builtin_va_start (valist, nextarg);
8441 else
8443 rtx va_r, next;
8445 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8446 next = expand_binop (ptr_mode, add_optab,
8447 cfun->machine->split_stack_varargs_pointer,
8448 crtl->args.arg_offset_rtx,
8449 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8450 convert_move (va_r, next, 0);
8452 return;
8455 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8456 f_fpr = DECL_CHAIN (f_gpr);
8457 f_ovf = DECL_CHAIN (f_fpr);
8458 f_sav = DECL_CHAIN (f_ovf);
8460 valist = build_simple_mem_ref (valist);
8461 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8462 /* The following should be folded into the MEM_REF offset. */
8463 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8464 f_gpr, NULL_TREE);
8465 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8466 f_fpr, NULL_TREE);
8467 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8468 f_ovf, NULL_TREE);
8469 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8470 f_sav, NULL_TREE);
8472 /* Count number of gp and fp argument registers used. */
8473 words = crtl->args.info.words;
8474 n_gpr = crtl->args.info.regno;
8475 n_fpr = crtl->args.info.sse_regno;
8477 if (cfun->va_list_gpr_size)
8479 type = TREE_TYPE (gpr);
8480 t = build2 (MODIFY_EXPR, type,
8481 gpr, build_int_cst (type, n_gpr * 8));
8482 TREE_SIDE_EFFECTS (t) = 1;
8483 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8486 if (TARGET_SSE && cfun->va_list_fpr_size)
8488 type = TREE_TYPE (fpr);
8489 t = build2 (MODIFY_EXPR, type, fpr,
8490 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8491 TREE_SIDE_EFFECTS (t) = 1;
8492 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8495 /* Find the overflow area. */
8496 type = TREE_TYPE (ovf);
8497 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8498 ovf_rtx = crtl->args.internal_arg_pointer;
8499 else
8500 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8501 t = make_tree (type, ovf_rtx);
8502 if (words != 0)
8503 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8504 t = build2 (MODIFY_EXPR, type, ovf, t);
8505 TREE_SIDE_EFFECTS (t) = 1;
8506 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8508 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8510 /* Find the register save area.
8511 Prologue of the function save it right above stack frame. */
8512 type = TREE_TYPE (sav);
8513 t = make_tree (type, frame_pointer_rtx);
8514 if (!ix86_varargs_gpr_size)
8515 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8516 t = build2 (MODIFY_EXPR, type, sav, t);
8517 TREE_SIDE_EFFECTS (t) = 1;
8518 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8522 /* Implement va_arg. */
8524 static tree
8525 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8526 gimple_seq *post_p)
8528 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8529 tree f_gpr, f_fpr, f_ovf, f_sav;
8530 tree gpr, fpr, ovf, sav, t;
8531 int size, rsize;
8532 tree lab_false, lab_over = NULL_TREE;
8533 tree addr, t2;
8534 rtx container;
8535 int indirect_p = 0;
8536 tree ptrtype;
8537 enum machine_mode nat_mode;
8538 unsigned int arg_boundary;
8540 /* Only 64bit target needs something special. */
8541 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8542 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8544 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8545 f_fpr = DECL_CHAIN (f_gpr);
8546 f_ovf = DECL_CHAIN (f_fpr);
8547 f_sav = DECL_CHAIN (f_ovf);
8549 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8550 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8551 valist = build_va_arg_indirect_ref (valist);
8552 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8553 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8554 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8556 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8557 if (indirect_p)
8558 type = build_pointer_type (type);
8559 size = int_size_in_bytes (type);
8560 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8562 nat_mode = type_natural_mode (type, NULL, false);
8563 switch (nat_mode)
8565 case V8SFmode:
8566 case V8SImode:
8567 case V32QImode:
8568 case V16HImode:
8569 case V4DFmode:
8570 case V4DImode:
8571 case V16SFmode:
8572 case V16SImode:
8573 case V64QImode:
8574 case V32HImode:
8575 case V8DFmode:
8576 case V8DImode:
8577 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8578 if (!TARGET_64BIT_MS_ABI)
8580 container = NULL;
8581 break;
8584 default:
8585 container = construct_container (nat_mode, TYPE_MODE (type),
8586 type, 0, X86_64_REGPARM_MAX,
8587 X86_64_SSE_REGPARM_MAX, intreg,
8589 break;
8592 /* Pull the value out of the saved registers. */
8594 addr = create_tmp_var (ptr_type_node, "addr");
8596 if (container)
8598 int needed_intregs, needed_sseregs;
8599 bool need_temp;
8600 tree int_addr, sse_addr;
8602 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8603 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8605 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8607 need_temp = (!REG_P (container)
8608 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8609 || TYPE_ALIGN (type) > 128));
8611 /* In case we are passing structure, verify that it is consecutive block
8612 on the register save area. If not we need to do moves. */
8613 if (!need_temp && !REG_P (container))
8615 /* Verify that all registers are strictly consecutive */
8616 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8618 int i;
8620 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8622 rtx slot = XVECEXP (container, 0, i);
8623 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8624 || INTVAL (XEXP (slot, 1)) != i * 16)
8625 need_temp = 1;
8628 else
8630 int i;
8632 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8634 rtx slot = XVECEXP (container, 0, i);
8635 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8636 || INTVAL (XEXP (slot, 1)) != i * 8)
8637 need_temp = 1;
8641 if (!need_temp)
8643 int_addr = addr;
8644 sse_addr = addr;
8646 else
8648 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8649 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8652 /* First ensure that we fit completely in registers. */
8653 if (needed_intregs)
8655 t = build_int_cst (TREE_TYPE (gpr),
8656 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8657 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8658 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8659 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8660 gimplify_and_add (t, pre_p);
8662 if (needed_sseregs)
8664 t = build_int_cst (TREE_TYPE (fpr),
8665 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8666 + X86_64_REGPARM_MAX * 8);
8667 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8668 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8669 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8670 gimplify_and_add (t, pre_p);
8673 /* Compute index to start of area used for integer regs. */
8674 if (needed_intregs)
8676 /* int_addr = gpr + sav; */
8677 t = fold_build_pointer_plus (sav, gpr);
8678 gimplify_assign (int_addr, t, pre_p);
8680 if (needed_sseregs)
8682 /* sse_addr = fpr + sav; */
8683 t = fold_build_pointer_plus (sav, fpr);
8684 gimplify_assign (sse_addr, t, pre_p);
8686 if (need_temp)
8688 int i, prev_size = 0;
8689 tree temp = create_tmp_var (type, "va_arg_tmp");
8691 /* addr = &temp; */
8692 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8693 gimplify_assign (addr, t, pre_p);
8695 for (i = 0; i < XVECLEN (container, 0); i++)
8697 rtx slot = XVECEXP (container, 0, i);
8698 rtx reg = XEXP (slot, 0);
8699 enum machine_mode mode = GET_MODE (reg);
8700 tree piece_type;
8701 tree addr_type;
8702 tree daddr_type;
8703 tree src_addr, src;
8704 int src_offset;
8705 tree dest_addr, dest;
8706 int cur_size = GET_MODE_SIZE (mode);
8708 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8709 prev_size = INTVAL (XEXP (slot, 1));
8710 if (prev_size + cur_size > size)
8712 cur_size = size - prev_size;
8713 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8714 if (mode == BLKmode)
8715 mode = QImode;
8717 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8718 if (mode == GET_MODE (reg))
8719 addr_type = build_pointer_type (piece_type);
8720 else
8721 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8722 true);
8723 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8724 true);
8726 if (SSE_REGNO_P (REGNO (reg)))
8728 src_addr = sse_addr;
8729 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8731 else
8733 src_addr = int_addr;
8734 src_offset = REGNO (reg) * 8;
8736 src_addr = fold_convert (addr_type, src_addr);
8737 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8739 dest_addr = fold_convert (daddr_type, addr);
8740 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8741 if (cur_size == GET_MODE_SIZE (mode))
8743 src = build_va_arg_indirect_ref (src_addr);
8744 dest = build_va_arg_indirect_ref (dest_addr);
8746 gimplify_assign (dest, src, pre_p);
8748 else
8750 tree copy
8751 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8752 3, dest_addr, src_addr,
8753 size_int (cur_size));
8754 gimplify_and_add (copy, pre_p);
8756 prev_size += cur_size;
8760 if (needed_intregs)
8762 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8763 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8764 gimplify_assign (gpr, t, pre_p);
8767 if (needed_sseregs)
8769 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8770 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8771 gimplify_assign (fpr, t, pre_p);
8774 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8776 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8779 /* ... otherwise out of the overflow area. */
8781 /* When we align parameter on stack for caller, if the parameter
8782 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8783 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8784 here with caller. */
8785 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8786 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8787 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8789 /* Care for on-stack alignment if needed. */
8790 if (arg_boundary <= 64 || size == 0)
8791 t = ovf;
8792 else
8794 HOST_WIDE_INT align = arg_boundary / 8;
8795 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8796 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8797 build_int_cst (TREE_TYPE (t), -align));
8800 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8801 gimplify_assign (addr, t, pre_p);
8803 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8804 gimplify_assign (unshare_expr (ovf), t, pre_p);
8806 if (container)
8807 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8809 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8810 addr = fold_convert (ptrtype, addr);
8812 if (indirect_p)
8813 addr = build_va_arg_indirect_ref (addr);
8814 return build_va_arg_indirect_ref (addr);
8817 /* Return true if OPNUM's MEM should be matched
8818 in movabs* patterns. */
8820 bool
8821 ix86_check_movabs (rtx insn, int opnum)
8823 rtx set, mem;
8825 set = PATTERN (insn);
8826 if (GET_CODE (set) == PARALLEL)
8827 set = XVECEXP (set, 0, 0);
8828 gcc_assert (GET_CODE (set) == SET);
8829 mem = XEXP (set, opnum);
8830 while (GET_CODE (mem) == SUBREG)
8831 mem = SUBREG_REG (mem);
8832 gcc_assert (MEM_P (mem));
8833 return volatile_ok || !MEM_VOLATILE_P (mem);
8836 /* Initialize the table of extra 80387 mathematical constants. */
8838 static void
8839 init_ext_80387_constants (void)
8841 static const char * cst[5] =
8843 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8844 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8845 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8846 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8847 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8849 int i;
8851 for (i = 0; i < 5; i++)
8853 real_from_string (&ext_80387_constants_table[i], cst[i]);
8854 /* Ensure each constant is rounded to XFmode precision. */
8855 real_convert (&ext_80387_constants_table[i],
8856 XFmode, &ext_80387_constants_table[i]);
8859 ext_80387_constants_init = 1;
8862 /* Return non-zero if the constant is something that
8863 can be loaded with a special instruction. */
8866 standard_80387_constant_p (rtx x)
8868 enum machine_mode mode = GET_MODE (x);
8870 REAL_VALUE_TYPE r;
8872 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8873 return -1;
8875 if (x == CONST0_RTX (mode))
8876 return 1;
8877 if (x == CONST1_RTX (mode))
8878 return 2;
8880 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8882 /* For XFmode constants, try to find a special 80387 instruction when
8883 optimizing for size or on those CPUs that benefit from them. */
8884 if (mode == XFmode
8885 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8887 int i;
8889 if (! ext_80387_constants_init)
8890 init_ext_80387_constants ();
8892 for (i = 0; i < 5; i++)
8893 if (real_identical (&r, &ext_80387_constants_table[i]))
8894 return i + 3;
8897 /* Load of the constant -0.0 or -1.0 will be split as
8898 fldz;fchs or fld1;fchs sequence. */
8899 if (real_isnegzero (&r))
8900 return 8;
8901 if (real_identical (&r, &dconstm1))
8902 return 9;
8904 return 0;
8907 /* Return the opcode of the special instruction to be used to load
8908 the constant X. */
8910 const char *
8911 standard_80387_constant_opcode (rtx x)
8913 switch (standard_80387_constant_p (x))
8915 case 1:
8916 return "fldz";
8917 case 2:
8918 return "fld1";
8919 case 3:
8920 return "fldlg2";
8921 case 4:
8922 return "fldln2";
8923 case 5:
8924 return "fldl2e";
8925 case 6:
8926 return "fldl2t";
8927 case 7:
8928 return "fldpi";
8929 case 8:
8930 case 9:
8931 return "#";
8932 default:
8933 gcc_unreachable ();
8937 /* Return the CONST_DOUBLE representing the 80387 constant that is
8938 loaded by the specified special instruction. The argument IDX
8939 matches the return value from standard_80387_constant_p. */
8942 standard_80387_constant_rtx (int idx)
8944 int i;
8946 if (! ext_80387_constants_init)
8947 init_ext_80387_constants ();
8949 switch (idx)
8951 case 3:
8952 case 4:
8953 case 5:
8954 case 6:
8955 case 7:
8956 i = idx - 3;
8957 break;
8959 default:
8960 gcc_unreachable ();
8963 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8964 XFmode);
8967 /* Return 1 if X is all 0s and 2 if x is all 1s
8968 in supported SSE/AVX vector mode. */
8971 standard_sse_constant_p (rtx x)
8973 enum machine_mode mode = GET_MODE (x);
8975 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8976 return 1;
8977 if (vector_all_ones_operand (x, mode))
8978 switch (mode)
8980 case V16QImode:
8981 case V8HImode:
8982 case V4SImode:
8983 case V2DImode:
8984 if (TARGET_SSE2)
8985 return 2;
8986 case V32QImode:
8987 case V16HImode:
8988 case V8SImode:
8989 case V4DImode:
8990 if (TARGET_AVX2)
8991 return 2;
8992 case V64QImode:
8993 case V32HImode:
8994 case V16SImode:
8995 case V8DImode:
8996 if (TARGET_AVX512F)
8997 return 2;
8998 default:
8999 break;
9002 return 0;
9005 /* Return the opcode of the special instruction to be used to load
9006 the constant X. */
9008 const char *
9009 standard_sse_constant_opcode (rtx insn, rtx x)
9011 switch (standard_sse_constant_p (x))
9013 case 1:
9014 switch (get_attr_mode (insn))
9016 case MODE_XI:
9017 return "vpxord\t%g0, %g0, %g0";
9018 case MODE_V16SF:
9019 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9020 : "vpxord\t%g0, %g0, %g0";
9021 case MODE_V8DF:
9022 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9023 : "vpxorq\t%g0, %g0, %g0";
9024 case MODE_TI:
9025 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9026 : "%vpxor\t%0, %d0";
9027 case MODE_V2DF:
9028 return "%vxorpd\t%0, %d0";
9029 case MODE_V4SF:
9030 return "%vxorps\t%0, %d0";
9032 case MODE_OI:
9033 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9034 : "vpxor\t%x0, %x0, %x0";
9035 case MODE_V4DF:
9036 return "vxorpd\t%x0, %x0, %x0";
9037 case MODE_V8SF:
9038 return "vxorps\t%x0, %x0, %x0";
9040 default:
9041 break;
9044 case 2:
9045 if (TARGET_AVX512VL
9046 || get_attr_mode (insn) == MODE_XI
9047 || get_attr_mode (insn) == MODE_V8DF
9048 || get_attr_mode (insn) == MODE_V16SF)
9049 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9050 if (TARGET_AVX)
9051 return "vpcmpeqd\t%0, %0, %0";
9052 else
9053 return "pcmpeqd\t%0, %0";
9055 default:
9056 break;
9058 gcc_unreachable ();
9061 /* Returns true if OP contains a symbol reference */
9063 bool
9064 symbolic_reference_mentioned_p (rtx op)
9066 const char *fmt;
9067 int i;
9069 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9070 return true;
9072 fmt = GET_RTX_FORMAT (GET_CODE (op));
9073 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9075 if (fmt[i] == 'E')
9077 int j;
9079 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9080 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9081 return true;
9084 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9085 return true;
9088 return false;
9091 /* Return true if it is appropriate to emit `ret' instructions in the
9092 body of a function. Do this only if the epilogue is simple, needing a
9093 couple of insns. Prior to reloading, we can't tell how many registers
9094 must be saved, so return false then. Return false if there is no frame
9095 marker to de-allocate. */
9097 bool
9098 ix86_can_use_return_insn_p (void)
9100 struct ix86_frame frame;
9102 if (! reload_completed || frame_pointer_needed)
9103 return 0;
9105 /* Don't allow more than 32k pop, since that's all we can do
9106 with one instruction. */
9107 if (crtl->args.pops_args && crtl->args.size >= 32768)
9108 return 0;
9110 ix86_compute_frame_layout (&frame);
9111 return (frame.stack_pointer_offset == UNITS_PER_WORD
9112 && (frame.nregs + frame.nsseregs) == 0);
9115 /* Value should be nonzero if functions must have frame pointers.
9116 Zero means the frame pointer need not be set up (and parms may
9117 be accessed via the stack pointer) in functions that seem suitable. */
9119 static bool
9120 ix86_frame_pointer_required (void)
9122 /* If we accessed previous frames, then the generated code expects
9123 to be able to access the saved ebp value in our frame. */
9124 if (cfun->machine->accesses_prev_frame)
9125 return true;
9127 /* Several x86 os'es need a frame pointer for other reasons,
9128 usually pertaining to setjmp. */
9129 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9130 return true;
9132 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9133 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9134 return true;
9136 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9137 allocation is 4GB. */
9138 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9139 return true;
9141 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9142 turns off the frame pointer by default. Turn it back on now if
9143 we've not got a leaf function. */
9144 if (TARGET_OMIT_LEAF_FRAME_POINTER
9145 && (!crtl->is_leaf
9146 || ix86_current_function_calls_tls_descriptor))
9147 return true;
9149 if (crtl->profile && !flag_fentry)
9150 return true;
9152 return false;
9155 /* Record that the current function accesses previous call frames. */
9157 void
9158 ix86_setup_frame_addresses (void)
9160 cfun->machine->accesses_prev_frame = 1;
9163 #ifndef USE_HIDDEN_LINKONCE
9164 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9165 # define USE_HIDDEN_LINKONCE 1
9166 # else
9167 # define USE_HIDDEN_LINKONCE 0
9168 # endif
9169 #endif
9171 static int pic_labels_used;
9173 /* Fills in the label name that should be used for a pc thunk for
9174 the given register. */
9176 static void
9177 get_pc_thunk_name (char name[32], unsigned int regno)
9179 gcc_assert (!TARGET_64BIT);
9181 if (USE_HIDDEN_LINKONCE)
9182 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9183 else
9184 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9188 /* This function generates code for -fpic that loads %ebx with
9189 the return address of the caller and then returns. */
9191 static void
9192 ix86_code_end (void)
9194 rtx xops[2];
9195 int regno;
9197 for (regno = AX_REG; regno <= SP_REG; regno++)
9199 char name[32];
9200 tree decl;
9202 if (!(pic_labels_used & (1 << regno)))
9203 continue;
9205 get_pc_thunk_name (name, regno);
9207 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9208 get_identifier (name),
9209 build_function_type_list (void_type_node, NULL_TREE));
9210 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9211 NULL_TREE, void_type_node);
9212 TREE_PUBLIC (decl) = 1;
9213 TREE_STATIC (decl) = 1;
9214 DECL_IGNORED_P (decl) = 1;
9216 #if TARGET_MACHO
9217 if (TARGET_MACHO)
9219 switch_to_section (darwin_sections[text_coal_section]);
9220 fputs ("\t.weak_definition\t", asm_out_file);
9221 assemble_name (asm_out_file, name);
9222 fputs ("\n\t.private_extern\t", asm_out_file);
9223 assemble_name (asm_out_file, name);
9224 putc ('\n', asm_out_file);
9225 ASM_OUTPUT_LABEL (asm_out_file, name);
9226 DECL_WEAK (decl) = 1;
9228 else
9229 #endif
9230 if (USE_HIDDEN_LINKONCE)
9232 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9234 targetm.asm_out.unique_section (decl, 0);
9235 switch_to_section (get_named_section (decl, NULL, 0));
9237 targetm.asm_out.globalize_label (asm_out_file, name);
9238 fputs ("\t.hidden\t", asm_out_file);
9239 assemble_name (asm_out_file, name);
9240 putc ('\n', asm_out_file);
9241 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9243 else
9245 switch_to_section (text_section);
9246 ASM_OUTPUT_LABEL (asm_out_file, name);
9249 DECL_INITIAL (decl) = make_node (BLOCK);
9250 current_function_decl = decl;
9251 init_function_start (decl);
9252 first_function_block_is_cold = false;
9253 /* Make sure unwind info is emitted for the thunk if needed. */
9254 final_start_function (emit_barrier (), asm_out_file, 1);
9256 /* Pad stack IP move with 4 instructions (two NOPs count
9257 as one instruction). */
9258 if (TARGET_PAD_SHORT_FUNCTION)
9260 int i = 8;
9262 while (i--)
9263 fputs ("\tnop\n", asm_out_file);
9266 xops[0] = gen_rtx_REG (Pmode, regno);
9267 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9268 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9269 fputs ("\tret\n", asm_out_file);
9270 final_end_function ();
9271 init_insn_lengths ();
9272 free_after_compilation (cfun);
9273 set_cfun (NULL);
9274 current_function_decl = NULL;
9277 if (flag_split_stack)
9278 file_end_indicate_split_stack ();
9281 /* Emit code for the SET_GOT patterns. */
9283 const char *
9284 output_set_got (rtx dest, rtx label)
9286 rtx xops[3];
9288 xops[0] = dest;
9290 if (TARGET_VXWORKS_RTP && flag_pic)
9292 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9293 xops[2] = gen_rtx_MEM (Pmode,
9294 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9295 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9297 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9298 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9299 an unadorned address. */
9300 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9301 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9302 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9303 return "";
9306 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9308 if (!flag_pic)
9310 if (TARGET_MACHO)
9311 /* We don't need a pic base, we're not producing pic. */
9312 gcc_unreachable ();
9314 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9315 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9316 targetm.asm_out.internal_label (asm_out_file, "L",
9317 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9319 else
9321 char name[32];
9322 get_pc_thunk_name (name, REGNO (dest));
9323 pic_labels_used |= 1 << REGNO (dest);
9325 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9326 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9327 output_asm_insn ("call\t%X2", xops);
9329 #if TARGET_MACHO
9330 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9331 This is what will be referenced by the Mach-O PIC subsystem. */
9332 if (machopic_should_output_picbase_label () || !label)
9333 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9335 /* When we are restoring the pic base at the site of a nonlocal label,
9336 and we decided to emit the pic base above, we will still output a
9337 local label used for calculating the correction offset (even though
9338 the offset will be 0 in that case). */
9339 if (label)
9340 targetm.asm_out.internal_label (asm_out_file, "L",
9341 CODE_LABEL_NUMBER (label));
9342 #endif
9345 if (!TARGET_MACHO)
9346 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9348 return "";
9351 /* Generate an "push" pattern for input ARG. */
9353 static rtx
9354 gen_push (rtx arg)
9356 struct machine_function *m = cfun->machine;
9358 if (m->fs.cfa_reg == stack_pointer_rtx)
9359 m->fs.cfa_offset += UNITS_PER_WORD;
9360 m->fs.sp_offset += UNITS_PER_WORD;
9362 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9363 arg = gen_rtx_REG (word_mode, REGNO (arg));
9365 return gen_rtx_SET (VOIDmode,
9366 gen_rtx_MEM (word_mode,
9367 gen_rtx_PRE_DEC (Pmode,
9368 stack_pointer_rtx)),
9369 arg);
9372 /* Generate an "pop" pattern for input ARG. */
9374 static rtx
9375 gen_pop (rtx arg)
9377 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9378 arg = gen_rtx_REG (word_mode, REGNO (arg));
9380 return gen_rtx_SET (VOIDmode,
9381 arg,
9382 gen_rtx_MEM (word_mode,
9383 gen_rtx_POST_INC (Pmode,
9384 stack_pointer_rtx)));
9387 /* Return >= 0 if there is an unused call-clobbered register available
9388 for the entire function. */
9390 static unsigned int
9391 ix86_select_alt_pic_regnum (void)
9393 if (crtl->is_leaf
9394 && !crtl->profile
9395 && !ix86_current_function_calls_tls_descriptor)
9397 int i, drap;
9398 /* Can't use the same register for both PIC and DRAP. */
9399 if (crtl->drap_reg)
9400 drap = REGNO (crtl->drap_reg);
9401 else
9402 drap = -1;
9403 for (i = 2; i >= 0; --i)
9404 if (i != drap && !df_regs_ever_live_p (i))
9405 return i;
9408 return INVALID_REGNUM;
9411 /* Return TRUE if we need to save REGNO. */
9413 static bool
9414 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9416 if (pic_offset_table_rtx
9417 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9418 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9419 || crtl->profile
9420 || crtl->calls_eh_return
9421 || crtl->uses_const_pool
9422 || cfun->has_nonlocal_label))
9423 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9425 if (crtl->calls_eh_return && maybe_eh_return)
9427 unsigned i;
9428 for (i = 0; ; i++)
9430 unsigned test = EH_RETURN_DATA_REGNO (i);
9431 if (test == INVALID_REGNUM)
9432 break;
9433 if (test == regno)
9434 return true;
9438 if (crtl->drap_reg
9439 && regno == REGNO (crtl->drap_reg)
9440 && !cfun->machine->no_drap_save_restore)
9441 return true;
9443 return (df_regs_ever_live_p (regno)
9444 && !call_used_regs[regno]
9445 && !fixed_regs[regno]
9446 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9449 /* Return number of saved general prupose registers. */
9451 static int
9452 ix86_nsaved_regs (void)
9454 int nregs = 0;
9455 int regno;
9457 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9458 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9459 nregs ++;
9460 return nregs;
9463 /* Return number of saved SSE registrers. */
9465 static int
9466 ix86_nsaved_sseregs (void)
9468 int nregs = 0;
9469 int regno;
9471 if (!TARGET_64BIT_MS_ABI)
9472 return 0;
9473 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9474 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9475 nregs ++;
9476 return nregs;
9479 /* Given FROM and TO register numbers, say whether this elimination is
9480 allowed. If stack alignment is needed, we can only replace argument
9481 pointer with hard frame pointer, or replace frame pointer with stack
9482 pointer. Otherwise, frame pointer elimination is automatically
9483 handled and all other eliminations are valid. */
9485 static bool
9486 ix86_can_eliminate (const int from, const int to)
9488 if (stack_realign_fp)
9489 return ((from == ARG_POINTER_REGNUM
9490 && to == HARD_FRAME_POINTER_REGNUM)
9491 || (from == FRAME_POINTER_REGNUM
9492 && to == STACK_POINTER_REGNUM));
9493 else
9494 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9497 /* Return the offset between two registers, one to be eliminated, and the other
9498 its replacement, at the start of a routine. */
9500 HOST_WIDE_INT
9501 ix86_initial_elimination_offset (int from, int to)
9503 struct ix86_frame frame;
9504 ix86_compute_frame_layout (&frame);
9506 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9507 return frame.hard_frame_pointer_offset;
9508 else if (from == FRAME_POINTER_REGNUM
9509 && to == HARD_FRAME_POINTER_REGNUM)
9510 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9511 else
9513 gcc_assert (to == STACK_POINTER_REGNUM);
9515 if (from == ARG_POINTER_REGNUM)
9516 return frame.stack_pointer_offset;
9518 gcc_assert (from == FRAME_POINTER_REGNUM);
9519 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9523 /* In a dynamically-aligned function, we can't know the offset from
9524 stack pointer to frame pointer, so we must ensure that setjmp
9525 eliminates fp against the hard fp (%ebp) rather than trying to
9526 index from %esp up to the top of the frame across a gap that is
9527 of unknown (at compile-time) size. */
9528 static rtx
9529 ix86_builtin_setjmp_frame_value (void)
9531 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9534 /* When using -fsplit-stack, the allocation routines set a field in
9535 the TCB to the bottom of the stack plus this much space, measured
9536 in bytes. */
9538 #define SPLIT_STACK_AVAILABLE 256
9540 /* Fill structure ix86_frame about frame of currently computed function. */
9542 static void
9543 ix86_compute_frame_layout (struct ix86_frame *frame)
9545 unsigned HOST_WIDE_INT stack_alignment_needed;
9546 HOST_WIDE_INT offset;
9547 unsigned HOST_WIDE_INT preferred_alignment;
9548 HOST_WIDE_INT size = get_frame_size ();
9549 HOST_WIDE_INT to_allocate;
9551 frame->nregs = ix86_nsaved_regs ();
9552 frame->nsseregs = ix86_nsaved_sseregs ();
9554 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9555 function prologues and leaf. */
9556 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9557 && (!crtl->is_leaf || cfun->calls_alloca != 0
9558 || ix86_current_function_calls_tls_descriptor))
9560 crtl->preferred_stack_boundary = 128;
9561 crtl->stack_alignment_needed = 128;
9563 /* preferred_stack_boundary is never updated for call
9564 expanded from tls descriptor. Update it here. We don't update it in
9565 expand stage because according to the comments before
9566 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9567 away. */
9568 else if (ix86_current_function_calls_tls_descriptor
9569 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9571 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9572 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9573 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9576 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9577 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9579 gcc_assert (!size || stack_alignment_needed);
9580 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9581 gcc_assert (preferred_alignment <= stack_alignment_needed);
9583 /* For SEH we have to limit the amount of code movement into the prologue.
9584 At present we do this via a BLOCKAGE, at which point there's very little
9585 scheduling that can be done, which means that there's very little point
9586 in doing anything except PUSHs. */
9587 if (TARGET_SEH)
9588 cfun->machine->use_fast_prologue_epilogue = false;
9590 /* During reload iteration the amount of registers saved can change.
9591 Recompute the value as needed. Do not recompute when amount of registers
9592 didn't change as reload does multiple calls to the function and does not
9593 expect the decision to change within single iteration. */
9594 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9595 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9597 int count = frame->nregs;
9598 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9600 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9602 /* The fast prologue uses move instead of push to save registers. This
9603 is significantly longer, but also executes faster as modern hardware
9604 can execute the moves in parallel, but can't do that for push/pop.
9606 Be careful about choosing what prologue to emit: When function takes
9607 many instructions to execute we may use slow version as well as in
9608 case function is known to be outside hot spot (this is known with
9609 feedback only). Weight the size of function by number of registers
9610 to save as it is cheap to use one or two push instructions but very
9611 slow to use many of them. */
9612 if (count)
9613 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9614 if (node->frequency < NODE_FREQUENCY_NORMAL
9615 || (flag_branch_probabilities
9616 && node->frequency < NODE_FREQUENCY_HOT))
9617 cfun->machine->use_fast_prologue_epilogue = false;
9618 else
9619 cfun->machine->use_fast_prologue_epilogue
9620 = !expensive_function_p (count);
9623 frame->save_regs_using_mov
9624 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9625 /* If static stack checking is enabled and done with probes,
9626 the registers need to be saved before allocating the frame. */
9627 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9629 /* Skip return address. */
9630 offset = UNITS_PER_WORD;
9632 /* Skip pushed static chain. */
9633 if (ix86_static_chain_on_stack)
9634 offset += UNITS_PER_WORD;
9636 /* Skip saved base pointer. */
9637 if (frame_pointer_needed)
9638 offset += UNITS_PER_WORD;
9639 frame->hfp_save_offset = offset;
9641 /* The traditional frame pointer location is at the top of the frame. */
9642 frame->hard_frame_pointer_offset = offset;
9644 /* Register save area */
9645 offset += frame->nregs * UNITS_PER_WORD;
9646 frame->reg_save_offset = offset;
9648 /* On SEH target, registers are pushed just before the frame pointer
9649 location. */
9650 if (TARGET_SEH)
9651 frame->hard_frame_pointer_offset = offset;
9653 /* Align and set SSE register save area. */
9654 if (frame->nsseregs)
9656 /* The only ABI that has saved SSE registers (Win64) also has a
9657 16-byte aligned default stack, and thus we don't need to be
9658 within the re-aligned local stack frame to save them. */
9659 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9660 offset = (offset + 16 - 1) & -16;
9661 offset += frame->nsseregs * 16;
9663 frame->sse_reg_save_offset = offset;
9665 /* The re-aligned stack starts here. Values before this point are not
9666 directly comparable with values below this point. In order to make
9667 sure that no value happens to be the same before and after, force
9668 the alignment computation below to add a non-zero value. */
9669 if (stack_realign_fp)
9670 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9672 /* Va-arg area */
9673 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9674 offset += frame->va_arg_size;
9676 /* Align start of frame for local function. */
9677 if (stack_realign_fp
9678 || offset != frame->sse_reg_save_offset
9679 || size != 0
9680 || !crtl->is_leaf
9681 || cfun->calls_alloca
9682 || ix86_current_function_calls_tls_descriptor)
9683 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9685 /* Frame pointer points here. */
9686 frame->frame_pointer_offset = offset;
9688 offset += size;
9690 /* Add outgoing arguments area. Can be skipped if we eliminated
9691 all the function calls as dead code.
9692 Skipping is however impossible when function calls alloca. Alloca
9693 expander assumes that last crtl->outgoing_args_size
9694 of stack frame are unused. */
9695 if (ACCUMULATE_OUTGOING_ARGS
9696 && (!crtl->is_leaf || cfun->calls_alloca
9697 || ix86_current_function_calls_tls_descriptor))
9699 offset += crtl->outgoing_args_size;
9700 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9702 else
9703 frame->outgoing_arguments_size = 0;
9705 /* Align stack boundary. Only needed if we're calling another function
9706 or using alloca. */
9707 if (!crtl->is_leaf || cfun->calls_alloca
9708 || ix86_current_function_calls_tls_descriptor)
9709 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9711 /* We've reached end of stack frame. */
9712 frame->stack_pointer_offset = offset;
9714 /* Size prologue needs to allocate. */
9715 to_allocate = offset - frame->sse_reg_save_offset;
9717 if ((!to_allocate && frame->nregs <= 1)
9718 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9719 frame->save_regs_using_mov = false;
9721 if (ix86_using_red_zone ()
9722 && crtl->sp_is_unchanging
9723 && crtl->is_leaf
9724 && !ix86_current_function_calls_tls_descriptor)
9726 frame->red_zone_size = to_allocate;
9727 if (frame->save_regs_using_mov)
9728 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9729 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9730 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9732 else
9733 frame->red_zone_size = 0;
9734 frame->stack_pointer_offset -= frame->red_zone_size;
9736 /* The SEH frame pointer location is near the bottom of the frame.
9737 This is enforced by the fact that the difference between the
9738 stack pointer and the frame pointer is limited to 240 bytes in
9739 the unwind data structure. */
9740 if (TARGET_SEH)
9742 HOST_WIDE_INT diff;
9744 /* If we can leave the frame pointer where it is, do so. Also, returns
9745 the establisher frame for __builtin_frame_address (0). */
9746 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9747 if (diff <= SEH_MAX_FRAME_SIZE
9748 && (diff > 240 || (diff & 15) != 0)
9749 && !crtl->accesses_prior_frames)
9751 /* Ideally we'd determine what portion of the local stack frame
9752 (within the constraint of the lowest 240) is most heavily used.
9753 But without that complication, simply bias the frame pointer
9754 by 128 bytes so as to maximize the amount of the local stack
9755 frame that is addressable with 8-bit offsets. */
9756 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9761 /* This is semi-inlined memory_address_length, but simplified
9762 since we know that we're always dealing with reg+offset, and
9763 to avoid having to create and discard all that rtl. */
9765 static inline int
9766 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9768 int len = 4;
9770 if (offset == 0)
9772 /* EBP and R13 cannot be encoded without an offset. */
9773 len = (regno == BP_REG || regno == R13_REG);
9775 else if (IN_RANGE (offset, -128, 127))
9776 len = 1;
9778 /* ESP and R12 must be encoded with a SIB byte. */
9779 if (regno == SP_REG || regno == R12_REG)
9780 len++;
9782 return len;
9785 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9786 The valid base registers are taken from CFUN->MACHINE->FS. */
9788 static rtx
9789 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9791 const struct machine_function *m = cfun->machine;
9792 rtx base_reg = NULL;
9793 HOST_WIDE_INT base_offset = 0;
9795 if (m->use_fast_prologue_epilogue)
9797 /* Choose the base register most likely to allow the most scheduling
9798 opportunities. Generally FP is valid throughout the function,
9799 while DRAP must be reloaded within the epilogue. But choose either
9800 over the SP due to increased encoding size. */
9802 if (m->fs.fp_valid)
9804 base_reg = hard_frame_pointer_rtx;
9805 base_offset = m->fs.fp_offset - cfa_offset;
9807 else if (m->fs.drap_valid)
9809 base_reg = crtl->drap_reg;
9810 base_offset = 0 - cfa_offset;
9812 else if (m->fs.sp_valid)
9814 base_reg = stack_pointer_rtx;
9815 base_offset = m->fs.sp_offset - cfa_offset;
9818 else
9820 HOST_WIDE_INT toffset;
9821 int len = 16, tlen;
9823 /* Choose the base register with the smallest address encoding.
9824 With a tie, choose FP > DRAP > SP. */
9825 if (m->fs.sp_valid)
9827 base_reg = stack_pointer_rtx;
9828 base_offset = m->fs.sp_offset - cfa_offset;
9829 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9831 if (m->fs.drap_valid)
9833 toffset = 0 - cfa_offset;
9834 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9835 if (tlen <= len)
9837 base_reg = crtl->drap_reg;
9838 base_offset = toffset;
9839 len = tlen;
9842 if (m->fs.fp_valid)
9844 toffset = m->fs.fp_offset - cfa_offset;
9845 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9846 if (tlen <= len)
9848 base_reg = hard_frame_pointer_rtx;
9849 base_offset = toffset;
9850 len = tlen;
9854 gcc_assert (base_reg != NULL);
9856 return plus_constant (Pmode, base_reg, base_offset);
9859 /* Emit code to save registers in the prologue. */
9861 static void
9862 ix86_emit_save_regs (void)
9864 unsigned int regno;
9865 rtx insn;
9867 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9868 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9870 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9871 RTX_FRAME_RELATED_P (insn) = 1;
9875 /* Emit a single register save at CFA - CFA_OFFSET. */
9877 static void
9878 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9879 HOST_WIDE_INT cfa_offset)
9881 struct machine_function *m = cfun->machine;
9882 rtx reg = gen_rtx_REG (mode, regno);
9883 rtx mem, addr, base, insn;
9885 addr = choose_baseaddr (cfa_offset);
9886 mem = gen_frame_mem (mode, addr);
9888 /* For SSE saves, we need to indicate the 128-bit alignment. */
9889 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9891 insn = emit_move_insn (mem, reg);
9892 RTX_FRAME_RELATED_P (insn) = 1;
9894 base = addr;
9895 if (GET_CODE (base) == PLUS)
9896 base = XEXP (base, 0);
9897 gcc_checking_assert (REG_P (base));
9899 /* When saving registers into a re-aligned local stack frame, avoid
9900 any tricky guessing by dwarf2out. */
9901 if (m->fs.realigned)
9903 gcc_checking_assert (stack_realign_drap);
9905 if (regno == REGNO (crtl->drap_reg))
9907 /* A bit of a hack. We force the DRAP register to be saved in
9908 the re-aligned stack frame, which provides us with a copy
9909 of the CFA that will last past the prologue. Install it. */
9910 gcc_checking_assert (cfun->machine->fs.fp_valid);
9911 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9912 cfun->machine->fs.fp_offset - cfa_offset);
9913 mem = gen_rtx_MEM (mode, addr);
9914 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9916 else
9918 /* The frame pointer is a stable reference within the
9919 aligned frame. Use it. */
9920 gcc_checking_assert (cfun->machine->fs.fp_valid);
9921 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9922 cfun->machine->fs.fp_offset - cfa_offset);
9923 mem = gen_rtx_MEM (mode, addr);
9924 add_reg_note (insn, REG_CFA_EXPRESSION,
9925 gen_rtx_SET (VOIDmode, mem, reg));
9929 /* The memory may not be relative to the current CFA register,
9930 which means that we may need to generate a new pattern for
9931 use by the unwind info. */
9932 else if (base != m->fs.cfa_reg)
9934 addr = plus_constant (Pmode, m->fs.cfa_reg,
9935 m->fs.cfa_offset - cfa_offset);
9936 mem = gen_rtx_MEM (mode, addr);
9937 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9941 /* Emit code to save registers using MOV insns.
9942 First register is stored at CFA - CFA_OFFSET. */
9943 static void
9944 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9946 unsigned int regno;
9948 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9949 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9951 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9952 cfa_offset -= UNITS_PER_WORD;
9956 /* Emit code to save SSE registers using MOV insns.
9957 First register is stored at CFA - CFA_OFFSET. */
9958 static void
9959 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9961 unsigned int regno;
9963 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9964 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9966 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9967 cfa_offset -= 16;
9971 static GTY(()) rtx queued_cfa_restores;
9973 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9974 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9975 Don't add the note if the previously saved value will be left untouched
9976 within stack red-zone till return, as unwinders can find the same value
9977 in the register and on the stack. */
9979 static void
9980 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9982 if (!crtl->shrink_wrapped
9983 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9984 return;
9986 if (insn)
9988 add_reg_note (insn, REG_CFA_RESTORE, reg);
9989 RTX_FRAME_RELATED_P (insn) = 1;
9991 else
9992 queued_cfa_restores
9993 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9996 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9998 static void
9999 ix86_add_queued_cfa_restore_notes (rtx insn)
10001 rtx last;
10002 if (!queued_cfa_restores)
10003 return;
10004 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10006 XEXP (last, 1) = REG_NOTES (insn);
10007 REG_NOTES (insn) = queued_cfa_restores;
10008 queued_cfa_restores = NULL_RTX;
10009 RTX_FRAME_RELATED_P (insn) = 1;
10012 /* Expand prologue or epilogue stack adjustment.
10013 The pattern exist to put a dependency on all ebp-based memory accesses.
10014 STYLE should be negative if instructions should be marked as frame related,
10015 zero if %r11 register is live and cannot be freely used and positive
10016 otherwise. */
10018 static void
10019 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10020 int style, bool set_cfa)
10022 struct machine_function *m = cfun->machine;
10023 rtx insn;
10024 bool add_frame_related_expr = false;
10026 if (Pmode == SImode)
10027 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10028 else if (x86_64_immediate_operand (offset, DImode))
10029 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10030 else
10032 rtx tmp;
10033 /* r11 is used by indirect sibcall return as well, set before the
10034 epilogue and used after the epilogue. */
10035 if (style)
10036 tmp = gen_rtx_REG (DImode, R11_REG);
10037 else
10039 gcc_assert (src != hard_frame_pointer_rtx
10040 && dest != hard_frame_pointer_rtx);
10041 tmp = hard_frame_pointer_rtx;
10043 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10044 if (style < 0)
10045 add_frame_related_expr = true;
10047 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10050 insn = emit_insn (insn);
10051 if (style >= 0)
10052 ix86_add_queued_cfa_restore_notes (insn);
10054 if (set_cfa)
10056 rtx r;
10058 gcc_assert (m->fs.cfa_reg == src);
10059 m->fs.cfa_offset += INTVAL (offset);
10060 m->fs.cfa_reg = dest;
10062 r = gen_rtx_PLUS (Pmode, src, offset);
10063 r = gen_rtx_SET (VOIDmode, dest, r);
10064 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10065 RTX_FRAME_RELATED_P (insn) = 1;
10067 else if (style < 0)
10069 RTX_FRAME_RELATED_P (insn) = 1;
10070 if (add_frame_related_expr)
10072 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10073 r = gen_rtx_SET (VOIDmode, dest, r);
10074 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10078 if (dest == stack_pointer_rtx)
10080 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10081 bool valid = m->fs.sp_valid;
10083 if (src == hard_frame_pointer_rtx)
10085 valid = m->fs.fp_valid;
10086 ooffset = m->fs.fp_offset;
10088 else if (src == crtl->drap_reg)
10090 valid = m->fs.drap_valid;
10091 ooffset = 0;
10093 else
10095 /* Else there are two possibilities: SP itself, which we set
10096 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10097 taken care of this by hand along the eh_return path. */
10098 gcc_checking_assert (src == stack_pointer_rtx
10099 || offset == const0_rtx);
10102 m->fs.sp_offset = ooffset - INTVAL (offset);
10103 m->fs.sp_valid = valid;
10107 /* Find an available register to be used as dynamic realign argument
10108 pointer regsiter. Such a register will be written in prologue and
10109 used in begin of body, so it must not be
10110 1. parameter passing register.
10111 2. GOT pointer.
10112 We reuse static-chain register if it is available. Otherwise, we
10113 use DI for i386 and R13 for x86-64. We chose R13 since it has
10114 shorter encoding.
10116 Return: the regno of chosen register. */
10118 static unsigned int
10119 find_drap_reg (void)
10121 tree decl = cfun->decl;
10123 if (TARGET_64BIT)
10125 /* Use R13 for nested function or function need static chain.
10126 Since function with tail call may use any caller-saved
10127 registers in epilogue, DRAP must not use caller-saved
10128 register in such case. */
10129 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10130 return R13_REG;
10132 return R10_REG;
10134 else
10136 /* Use DI for nested function or function need static chain.
10137 Since function with tail call may use any caller-saved
10138 registers in epilogue, DRAP must not use caller-saved
10139 register in such case. */
10140 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10141 return DI_REG;
10143 /* Reuse static chain register if it isn't used for parameter
10144 passing. */
10145 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10147 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10148 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10149 return CX_REG;
10151 return DI_REG;
10155 /* Return minimum incoming stack alignment. */
10157 static unsigned int
10158 ix86_minimum_incoming_stack_boundary (bool sibcall)
10160 unsigned int incoming_stack_boundary;
10162 /* Prefer the one specified at command line. */
10163 if (ix86_user_incoming_stack_boundary)
10164 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10165 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10166 if -mstackrealign is used, it isn't used for sibcall check and
10167 estimated stack alignment is 128bit. */
10168 else if (!sibcall
10169 && !TARGET_64BIT
10170 && ix86_force_align_arg_pointer
10171 && crtl->stack_alignment_estimated == 128)
10172 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10173 else
10174 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10176 /* Incoming stack alignment can be changed on individual functions
10177 via force_align_arg_pointer attribute. We use the smallest
10178 incoming stack boundary. */
10179 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10180 && lookup_attribute (ix86_force_align_arg_pointer_string,
10181 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10182 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10184 /* The incoming stack frame has to be aligned at least at
10185 parm_stack_boundary. */
10186 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10187 incoming_stack_boundary = crtl->parm_stack_boundary;
10189 /* Stack at entrance of main is aligned by runtime. We use the
10190 smallest incoming stack boundary. */
10191 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10192 && DECL_NAME (current_function_decl)
10193 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10194 && DECL_FILE_SCOPE_P (current_function_decl))
10195 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10197 return incoming_stack_boundary;
10200 /* Update incoming stack boundary and estimated stack alignment. */
10202 static void
10203 ix86_update_stack_boundary (void)
10205 ix86_incoming_stack_boundary
10206 = ix86_minimum_incoming_stack_boundary (false);
10208 /* x86_64 vararg needs 16byte stack alignment for register save
10209 area. */
10210 if (TARGET_64BIT
10211 && cfun->stdarg
10212 && crtl->stack_alignment_estimated < 128)
10213 crtl->stack_alignment_estimated = 128;
10216 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10217 needed or an rtx for DRAP otherwise. */
10219 static rtx
10220 ix86_get_drap_rtx (void)
10222 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10223 crtl->need_drap = true;
10225 if (stack_realign_drap)
10227 /* Assign DRAP to vDRAP and returns vDRAP */
10228 unsigned int regno = find_drap_reg ();
10229 rtx drap_vreg;
10230 rtx arg_ptr;
10231 rtx_insn *seq, *insn;
10233 arg_ptr = gen_rtx_REG (Pmode, regno);
10234 crtl->drap_reg = arg_ptr;
10236 start_sequence ();
10237 drap_vreg = copy_to_reg (arg_ptr);
10238 seq = get_insns ();
10239 end_sequence ();
10241 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10242 if (!optimize)
10244 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10245 RTX_FRAME_RELATED_P (insn) = 1;
10247 return drap_vreg;
10249 else
10250 return NULL;
10253 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10255 static rtx
10256 ix86_internal_arg_pointer (void)
10258 return virtual_incoming_args_rtx;
10261 struct scratch_reg {
10262 rtx reg;
10263 bool saved;
10266 /* Return a short-lived scratch register for use on function entry.
10267 In 32-bit mode, it is valid only after the registers are saved
10268 in the prologue. This register must be released by means of
10269 release_scratch_register_on_entry once it is dead. */
10271 static void
10272 get_scratch_register_on_entry (struct scratch_reg *sr)
10274 int regno;
10276 sr->saved = false;
10278 if (TARGET_64BIT)
10280 /* We always use R11 in 64-bit mode. */
10281 regno = R11_REG;
10283 else
10285 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10286 bool fastcall_p
10287 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10288 bool thiscall_p
10289 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10290 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10291 int regparm = ix86_function_regparm (fntype, decl);
10292 int drap_regno
10293 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10295 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10296 for the static chain register. */
10297 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10298 && drap_regno != AX_REG)
10299 regno = AX_REG;
10300 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10301 for the static chain register. */
10302 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10303 regno = AX_REG;
10304 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10305 regno = DX_REG;
10306 /* ecx is the static chain register. */
10307 else if (regparm < 3 && !fastcall_p && !thiscall_p
10308 && !static_chain_p
10309 && drap_regno != CX_REG)
10310 regno = CX_REG;
10311 else if (ix86_save_reg (BX_REG, true))
10312 regno = BX_REG;
10313 /* esi is the static chain register. */
10314 else if (!(regparm == 3 && static_chain_p)
10315 && ix86_save_reg (SI_REG, true))
10316 regno = SI_REG;
10317 else if (ix86_save_reg (DI_REG, true))
10318 regno = DI_REG;
10319 else
10321 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10322 sr->saved = true;
10326 sr->reg = gen_rtx_REG (Pmode, regno);
10327 if (sr->saved)
10329 rtx insn = emit_insn (gen_push (sr->reg));
10330 RTX_FRAME_RELATED_P (insn) = 1;
10334 /* Release a scratch register obtained from the preceding function. */
10336 static void
10337 release_scratch_register_on_entry (struct scratch_reg *sr)
10339 if (sr->saved)
10341 struct machine_function *m = cfun->machine;
10342 rtx x, insn = emit_insn (gen_pop (sr->reg));
10344 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10345 RTX_FRAME_RELATED_P (insn) = 1;
10346 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10347 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10348 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10349 m->fs.sp_offset -= UNITS_PER_WORD;
10353 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10355 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10357 static void
10358 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10360 /* We skip the probe for the first interval + a small dope of 4 words and
10361 probe that many bytes past the specified size to maintain a protection
10362 area at the botton of the stack. */
10363 const int dope = 4 * UNITS_PER_WORD;
10364 rtx size_rtx = GEN_INT (size), last;
10366 /* See if we have a constant small number of probes to generate. If so,
10367 that's the easy case. The run-time loop is made up of 11 insns in the
10368 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10369 for n # of intervals. */
10370 if (size <= 5 * PROBE_INTERVAL)
10372 HOST_WIDE_INT i, adjust;
10373 bool first_probe = true;
10375 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10376 values of N from 1 until it exceeds SIZE. If only one probe is
10377 needed, this will not generate any code. Then adjust and probe
10378 to PROBE_INTERVAL + SIZE. */
10379 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10381 if (first_probe)
10383 adjust = 2 * PROBE_INTERVAL + dope;
10384 first_probe = false;
10386 else
10387 adjust = PROBE_INTERVAL;
10389 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10390 plus_constant (Pmode, stack_pointer_rtx,
10391 -adjust)));
10392 emit_stack_probe (stack_pointer_rtx);
10395 if (first_probe)
10396 adjust = size + PROBE_INTERVAL + dope;
10397 else
10398 adjust = size + PROBE_INTERVAL - i;
10400 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10401 plus_constant (Pmode, stack_pointer_rtx,
10402 -adjust)));
10403 emit_stack_probe (stack_pointer_rtx);
10405 /* Adjust back to account for the additional first interval. */
10406 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10407 plus_constant (Pmode, stack_pointer_rtx,
10408 PROBE_INTERVAL + dope)));
10411 /* Otherwise, do the same as above, but in a loop. Note that we must be
10412 extra careful with variables wrapping around because we might be at
10413 the very top (or the very bottom) of the address space and we have
10414 to be able to handle this case properly; in particular, we use an
10415 equality test for the loop condition. */
10416 else
10418 HOST_WIDE_INT rounded_size;
10419 struct scratch_reg sr;
10421 get_scratch_register_on_entry (&sr);
10424 /* Step 1: round SIZE to the previous multiple of the interval. */
10426 rounded_size = size & -PROBE_INTERVAL;
10429 /* Step 2: compute initial and final value of the loop counter. */
10431 /* SP = SP_0 + PROBE_INTERVAL. */
10432 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10433 plus_constant (Pmode, stack_pointer_rtx,
10434 - (PROBE_INTERVAL + dope))));
10436 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10437 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10438 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10439 gen_rtx_PLUS (Pmode, sr.reg,
10440 stack_pointer_rtx)));
10443 /* Step 3: the loop
10445 while (SP != LAST_ADDR)
10447 SP = SP + PROBE_INTERVAL
10448 probe at SP
10451 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10452 values of N from 1 until it is equal to ROUNDED_SIZE. */
10454 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10457 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10458 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10460 if (size != rounded_size)
10462 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10463 plus_constant (Pmode, stack_pointer_rtx,
10464 rounded_size - size)));
10465 emit_stack_probe (stack_pointer_rtx);
10468 /* Adjust back to account for the additional first interval. */
10469 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10470 plus_constant (Pmode, stack_pointer_rtx,
10471 PROBE_INTERVAL + dope)));
10473 release_scratch_register_on_entry (&sr);
10476 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10478 /* Even if the stack pointer isn't the CFA register, we need to correctly
10479 describe the adjustments made to it, in particular differentiate the
10480 frame-related ones from the frame-unrelated ones. */
10481 if (size > 0)
10483 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10484 XVECEXP (expr, 0, 0)
10485 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10486 plus_constant (Pmode, stack_pointer_rtx, -size));
10487 XVECEXP (expr, 0, 1)
10488 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10489 plus_constant (Pmode, stack_pointer_rtx,
10490 PROBE_INTERVAL + dope + size));
10491 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10492 RTX_FRAME_RELATED_P (last) = 1;
10494 cfun->machine->fs.sp_offset += size;
10497 /* Make sure nothing is scheduled before we are done. */
10498 emit_insn (gen_blockage ());
10501 /* Adjust the stack pointer up to REG while probing it. */
10503 const char *
10504 output_adjust_stack_and_probe (rtx reg)
10506 static int labelno = 0;
10507 char loop_lab[32], end_lab[32];
10508 rtx xops[2];
10510 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10511 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10513 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10515 /* Jump to END_LAB if SP == LAST_ADDR. */
10516 xops[0] = stack_pointer_rtx;
10517 xops[1] = reg;
10518 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10519 fputs ("\tje\t", asm_out_file);
10520 assemble_name_raw (asm_out_file, end_lab);
10521 fputc ('\n', asm_out_file);
10523 /* SP = SP + PROBE_INTERVAL. */
10524 xops[1] = GEN_INT (PROBE_INTERVAL);
10525 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10527 /* Probe at SP. */
10528 xops[1] = const0_rtx;
10529 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10531 fprintf (asm_out_file, "\tjmp\t");
10532 assemble_name_raw (asm_out_file, loop_lab);
10533 fputc ('\n', asm_out_file);
10535 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10537 return "";
10540 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10541 inclusive. These are offsets from the current stack pointer. */
10543 static void
10544 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10546 /* See if we have a constant small number of probes to generate. If so,
10547 that's the easy case. The run-time loop is made up of 7 insns in the
10548 generic case while the compile-time loop is made up of n insns for n #
10549 of intervals. */
10550 if (size <= 7 * PROBE_INTERVAL)
10552 HOST_WIDE_INT i;
10554 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10555 it exceeds SIZE. If only one probe is needed, this will not
10556 generate any code. Then probe at FIRST + SIZE. */
10557 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10558 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10559 -(first + i)));
10561 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10562 -(first + size)));
10565 /* Otherwise, do the same as above, but in a loop. Note that we must be
10566 extra careful with variables wrapping around because we might be at
10567 the very top (or the very bottom) of the address space and we have
10568 to be able to handle this case properly; in particular, we use an
10569 equality test for the loop condition. */
10570 else
10572 HOST_WIDE_INT rounded_size, last;
10573 struct scratch_reg sr;
10575 get_scratch_register_on_entry (&sr);
10578 /* Step 1: round SIZE to the previous multiple of the interval. */
10580 rounded_size = size & -PROBE_INTERVAL;
10583 /* Step 2: compute initial and final value of the loop counter. */
10585 /* TEST_OFFSET = FIRST. */
10586 emit_move_insn (sr.reg, GEN_INT (-first));
10588 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10589 last = first + rounded_size;
10592 /* Step 3: the loop
10594 while (TEST_ADDR != LAST_ADDR)
10596 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10597 probe at TEST_ADDR
10600 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10601 until it is equal to ROUNDED_SIZE. */
10603 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10606 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10607 that SIZE is equal to ROUNDED_SIZE. */
10609 if (size != rounded_size)
10610 emit_stack_probe (plus_constant (Pmode,
10611 gen_rtx_PLUS (Pmode,
10612 stack_pointer_rtx,
10613 sr.reg),
10614 rounded_size - size));
10616 release_scratch_register_on_entry (&sr);
10619 /* Make sure nothing is scheduled before we are done. */
10620 emit_insn (gen_blockage ());
10623 /* Probe a range of stack addresses from REG to END, inclusive. These are
10624 offsets from the current stack pointer. */
10626 const char *
10627 output_probe_stack_range (rtx reg, rtx end)
10629 static int labelno = 0;
10630 char loop_lab[32], end_lab[32];
10631 rtx xops[3];
10633 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10634 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10636 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10638 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10639 xops[0] = reg;
10640 xops[1] = end;
10641 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10642 fputs ("\tje\t", asm_out_file);
10643 assemble_name_raw (asm_out_file, end_lab);
10644 fputc ('\n', asm_out_file);
10646 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10647 xops[1] = GEN_INT (PROBE_INTERVAL);
10648 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10650 /* Probe at TEST_ADDR. */
10651 xops[0] = stack_pointer_rtx;
10652 xops[1] = reg;
10653 xops[2] = const0_rtx;
10654 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10656 fprintf (asm_out_file, "\tjmp\t");
10657 assemble_name_raw (asm_out_file, loop_lab);
10658 fputc ('\n', asm_out_file);
10660 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10662 return "";
10665 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10666 to be generated in correct form. */
10667 static void
10668 ix86_finalize_stack_realign_flags (void)
10670 /* Check if stack realign is really needed after reload, and
10671 stores result in cfun */
10672 unsigned int incoming_stack_boundary
10673 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10674 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10675 unsigned int stack_realign = (incoming_stack_boundary
10676 < (crtl->is_leaf
10677 ? crtl->max_used_stack_slot_alignment
10678 : crtl->stack_alignment_needed));
10680 if (crtl->stack_realign_finalized)
10682 /* After stack_realign_needed is finalized, we can't no longer
10683 change it. */
10684 gcc_assert (crtl->stack_realign_needed == stack_realign);
10685 return;
10688 /* If the only reason for frame_pointer_needed is that we conservatively
10689 assumed stack realignment might be needed, but in the end nothing that
10690 needed the stack alignment had been spilled, clear frame_pointer_needed
10691 and say we don't need stack realignment. */
10692 if (stack_realign
10693 && frame_pointer_needed
10694 && crtl->is_leaf
10695 && flag_omit_frame_pointer
10696 && crtl->sp_is_unchanging
10697 && !ix86_current_function_calls_tls_descriptor
10698 && !crtl->accesses_prior_frames
10699 && !cfun->calls_alloca
10700 && !crtl->calls_eh_return
10701 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10702 && !ix86_frame_pointer_required ()
10703 && get_frame_size () == 0
10704 && ix86_nsaved_sseregs () == 0
10705 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10707 HARD_REG_SET set_up_by_prologue, prologue_used;
10708 basic_block bb;
10710 CLEAR_HARD_REG_SET (prologue_used);
10711 CLEAR_HARD_REG_SET (set_up_by_prologue);
10712 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10713 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10714 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10715 HARD_FRAME_POINTER_REGNUM);
10716 FOR_EACH_BB_FN (bb, cfun)
10718 rtx_insn *insn;
10719 FOR_BB_INSNS (bb, insn)
10720 if (NONDEBUG_INSN_P (insn)
10721 && requires_stack_frame_p (insn, prologue_used,
10722 set_up_by_prologue))
10724 crtl->stack_realign_needed = stack_realign;
10725 crtl->stack_realign_finalized = true;
10726 return;
10730 /* If drap has been set, but it actually isn't live at the start
10731 of the function, there is no reason to set it up. */
10732 if (crtl->drap_reg)
10734 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10735 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10737 crtl->drap_reg = NULL_RTX;
10738 crtl->need_drap = false;
10741 else
10742 cfun->machine->no_drap_save_restore = true;
10744 frame_pointer_needed = false;
10745 stack_realign = false;
10746 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10747 crtl->stack_alignment_needed = incoming_stack_boundary;
10748 crtl->stack_alignment_estimated = incoming_stack_boundary;
10749 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10750 crtl->preferred_stack_boundary = incoming_stack_boundary;
10751 df_finish_pass (true);
10752 df_scan_alloc (NULL);
10753 df_scan_blocks ();
10754 df_compute_regs_ever_live (true);
10755 df_analyze ();
10758 crtl->stack_realign_needed = stack_realign;
10759 crtl->stack_realign_finalized = true;
10762 /* Expand the prologue into a bunch of separate insns. */
10764 void
10765 ix86_expand_prologue (void)
10767 struct machine_function *m = cfun->machine;
10768 rtx insn, t;
10769 bool pic_reg_used;
10770 struct ix86_frame frame;
10771 HOST_WIDE_INT allocate;
10772 bool int_registers_saved;
10773 bool sse_registers_saved;
10775 ix86_finalize_stack_realign_flags ();
10777 /* DRAP should not coexist with stack_realign_fp */
10778 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10780 memset (&m->fs, 0, sizeof (m->fs));
10782 /* Initialize CFA state for before the prologue. */
10783 m->fs.cfa_reg = stack_pointer_rtx;
10784 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10786 /* Track SP offset to the CFA. We continue tracking this after we've
10787 swapped the CFA register away from SP. In the case of re-alignment
10788 this is fudged; we're interested to offsets within the local frame. */
10789 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10790 m->fs.sp_valid = true;
10792 ix86_compute_frame_layout (&frame);
10794 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10796 /* We should have already generated an error for any use of
10797 ms_hook on a nested function. */
10798 gcc_checking_assert (!ix86_static_chain_on_stack);
10800 /* Check if profiling is active and we shall use profiling before
10801 prologue variant. If so sorry. */
10802 if (crtl->profile && flag_fentry != 0)
10803 sorry ("ms_hook_prologue attribute isn%'t compatible "
10804 "with -mfentry for 32-bit");
10806 /* In ix86_asm_output_function_label we emitted:
10807 8b ff movl.s %edi,%edi
10808 55 push %ebp
10809 8b ec movl.s %esp,%ebp
10811 This matches the hookable function prologue in Win32 API
10812 functions in Microsoft Windows XP Service Pack 2 and newer.
10813 Wine uses this to enable Windows apps to hook the Win32 API
10814 functions provided by Wine.
10816 What that means is that we've already set up the frame pointer. */
10818 if (frame_pointer_needed
10819 && !(crtl->drap_reg && crtl->stack_realign_needed))
10821 rtx push, mov;
10823 /* We've decided to use the frame pointer already set up.
10824 Describe this to the unwinder by pretending that both
10825 push and mov insns happen right here.
10827 Putting the unwind info here at the end of the ms_hook
10828 is done so that we can make absolutely certain we get
10829 the required byte sequence at the start of the function,
10830 rather than relying on an assembler that can produce
10831 the exact encoding required.
10833 However it does mean (in the unpatched case) that we have
10834 a 1 insn window where the asynchronous unwind info is
10835 incorrect. However, if we placed the unwind info at
10836 its correct location we would have incorrect unwind info
10837 in the patched case. Which is probably all moot since
10838 I don't expect Wine generates dwarf2 unwind info for the
10839 system libraries that use this feature. */
10841 insn = emit_insn (gen_blockage ());
10843 push = gen_push (hard_frame_pointer_rtx);
10844 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10845 stack_pointer_rtx);
10846 RTX_FRAME_RELATED_P (push) = 1;
10847 RTX_FRAME_RELATED_P (mov) = 1;
10849 RTX_FRAME_RELATED_P (insn) = 1;
10850 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10851 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10853 /* Note that gen_push incremented m->fs.cfa_offset, even
10854 though we didn't emit the push insn here. */
10855 m->fs.cfa_reg = hard_frame_pointer_rtx;
10856 m->fs.fp_offset = m->fs.cfa_offset;
10857 m->fs.fp_valid = true;
10859 else
10861 /* The frame pointer is not needed so pop %ebp again.
10862 This leaves us with a pristine state. */
10863 emit_insn (gen_pop (hard_frame_pointer_rtx));
10867 /* The first insn of a function that accepts its static chain on the
10868 stack is to push the register that would be filled in by a direct
10869 call. This insn will be skipped by the trampoline. */
10870 else if (ix86_static_chain_on_stack)
10872 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10873 emit_insn (gen_blockage ());
10875 /* We don't want to interpret this push insn as a register save,
10876 only as a stack adjustment. The real copy of the register as
10877 a save will be done later, if needed. */
10878 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10879 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10880 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10881 RTX_FRAME_RELATED_P (insn) = 1;
10884 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10885 of DRAP is needed and stack realignment is really needed after reload */
10886 if (stack_realign_drap)
10888 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10890 /* Only need to push parameter pointer reg if it is caller saved. */
10891 if (!call_used_regs[REGNO (crtl->drap_reg)])
10893 /* Push arg pointer reg */
10894 insn = emit_insn (gen_push (crtl->drap_reg));
10895 RTX_FRAME_RELATED_P (insn) = 1;
10898 /* Grab the argument pointer. */
10899 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10900 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10901 RTX_FRAME_RELATED_P (insn) = 1;
10902 m->fs.cfa_reg = crtl->drap_reg;
10903 m->fs.cfa_offset = 0;
10905 /* Align the stack. */
10906 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10907 stack_pointer_rtx,
10908 GEN_INT (-align_bytes)));
10909 RTX_FRAME_RELATED_P (insn) = 1;
10911 /* Replicate the return address on the stack so that return
10912 address can be reached via (argp - 1) slot. This is needed
10913 to implement macro RETURN_ADDR_RTX and intrinsic function
10914 expand_builtin_return_addr etc. */
10915 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10916 t = gen_frame_mem (word_mode, t);
10917 insn = emit_insn (gen_push (t));
10918 RTX_FRAME_RELATED_P (insn) = 1;
10920 /* For the purposes of frame and register save area addressing,
10921 we've started over with a new frame. */
10922 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10923 m->fs.realigned = true;
10926 int_registers_saved = (frame.nregs == 0);
10927 sse_registers_saved = (frame.nsseregs == 0);
10929 if (frame_pointer_needed && !m->fs.fp_valid)
10931 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10932 slower on all targets. Also sdb doesn't like it. */
10933 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10934 RTX_FRAME_RELATED_P (insn) = 1;
10936 /* Push registers now, before setting the frame pointer
10937 on SEH target. */
10938 if (!int_registers_saved
10939 && TARGET_SEH
10940 && !frame.save_regs_using_mov)
10942 ix86_emit_save_regs ();
10943 int_registers_saved = true;
10944 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10947 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10949 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10950 RTX_FRAME_RELATED_P (insn) = 1;
10952 if (m->fs.cfa_reg == stack_pointer_rtx)
10953 m->fs.cfa_reg = hard_frame_pointer_rtx;
10954 m->fs.fp_offset = m->fs.sp_offset;
10955 m->fs.fp_valid = true;
10959 if (!int_registers_saved)
10961 /* If saving registers via PUSH, do so now. */
10962 if (!frame.save_regs_using_mov)
10964 ix86_emit_save_regs ();
10965 int_registers_saved = true;
10966 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10969 /* When using red zone we may start register saving before allocating
10970 the stack frame saving one cycle of the prologue. However, avoid
10971 doing this if we have to probe the stack; at least on x86_64 the
10972 stack probe can turn into a call that clobbers a red zone location. */
10973 else if (ix86_using_red_zone ()
10974 && (! TARGET_STACK_PROBE
10975 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10977 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10978 int_registers_saved = true;
10982 if (stack_realign_fp)
10984 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10985 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10987 /* The computation of the size of the re-aligned stack frame means
10988 that we must allocate the size of the register save area before
10989 performing the actual alignment. Otherwise we cannot guarantee
10990 that there's enough storage above the realignment point. */
10991 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10992 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10993 GEN_INT (m->fs.sp_offset
10994 - frame.sse_reg_save_offset),
10995 -1, false);
10997 /* Align the stack. */
10998 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10999 stack_pointer_rtx,
11000 GEN_INT (-align_bytes)));
11002 /* For the purposes of register save area addressing, the stack
11003 pointer is no longer valid. As for the value of sp_offset,
11004 see ix86_compute_frame_layout, which we need to match in order
11005 to pass verification of stack_pointer_offset at the end. */
11006 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11007 m->fs.sp_valid = false;
11010 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11012 if (flag_stack_usage_info)
11014 /* We start to count from ARG_POINTER. */
11015 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11017 /* If it was realigned, take into account the fake frame. */
11018 if (stack_realign_drap)
11020 if (ix86_static_chain_on_stack)
11021 stack_size += UNITS_PER_WORD;
11023 if (!call_used_regs[REGNO (crtl->drap_reg)])
11024 stack_size += UNITS_PER_WORD;
11026 /* This over-estimates by 1 minimal-stack-alignment-unit but
11027 mitigates that by counting in the new return address slot. */
11028 current_function_dynamic_stack_size
11029 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11032 current_function_static_stack_size = stack_size;
11035 /* On SEH target with very large frame size, allocate an area to save
11036 SSE registers (as the very large allocation won't be described). */
11037 if (TARGET_SEH
11038 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11039 && !sse_registers_saved)
11041 HOST_WIDE_INT sse_size =
11042 frame.sse_reg_save_offset - frame.reg_save_offset;
11044 gcc_assert (int_registers_saved);
11046 /* No need to do stack checking as the area will be immediately
11047 written. */
11048 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11049 GEN_INT (-sse_size), -1,
11050 m->fs.cfa_reg == stack_pointer_rtx);
11051 allocate -= sse_size;
11052 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11053 sse_registers_saved = true;
11056 /* The stack has already been decremented by the instruction calling us
11057 so probe if the size is non-negative to preserve the protection area. */
11058 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11060 /* We expect the registers to be saved when probes are used. */
11061 gcc_assert (int_registers_saved);
11063 if (STACK_CHECK_MOVING_SP)
11065 if (!(crtl->is_leaf && !cfun->calls_alloca
11066 && allocate <= PROBE_INTERVAL))
11068 ix86_adjust_stack_and_probe (allocate);
11069 allocate = 0;
11072 else
11074 HOST_WIDE_INT size = allocate;
11076 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11077 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11079 if (TARGET_STACK_PROBE)
11081 if (crtl->is_leaf && !cfun->calls_alloca)
11083 if (size > PROBE_INTERVAL)
11084 ix86_emit_probe_stack_range (0, size);
11086 else
11087 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11089 else
11091 if (crtl->is_leaf && !cfun->calls_alloca)
11093 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11094 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11095 size - STACK_CHECK_PROTECT);
11097 else
11098 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11103 if (allocate == 0)
11105 else if (!ix86_target_stack_probe ()
11106 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11108 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11109 GEN_INT (-allocate), -1,
11110 m->fs.cfa_reg == stack_pointer_rtx);
11112 else
11114 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11115 rtx r10 = NULL;
11116 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11117 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11118 bool eax_live = ix86_eax_live_at_start_p ();
11119 bool r10_live = false;
11121 if (TARGET_64BIT)
11122 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11124 if (eax_live)
11126 insn = emit_insn (gen_push (eax));
11127 allocate -= UNITS_PER_WORD;
11128 /* Note that SEH directives need to continue tracking the stack
11129 pointer even after the frame pointer has been set up. */
11130 if (sp_is_cfa_reg || TARGET_SEH)
11132 if (sp_is_cfa_reg)
11133 m->fs.cfa_offset += UNITS_PER_WORD;
11134 RTX_FRAME_RELATED_P (insn) = 1;
11138 if (r10_live)
11140 r10 = gen_rtx_REG (Pmode, R10_REG);
11141 insn = emit_insn (gen_push (r10));
11142 allocate -= UNITS_PER_WORD;
11143 if (sp_is_cfa_reg || TARGET_SEH)
11145 if (sp_is_cfa_reg)
11146 m->fs.cfa_offset += UNITS_PER_WORD;
11147 RTX_FRAME_RELATED_P (insn) = 1;
11151 emit_move_insn (eax, GEN_INT (allocate));
11152 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11154 /* Use the fact that AX still contains ALLOCATE. */
11155 adjust_stack_insn = (Pmode == DImode
11156 ? gen_pro_epilogue_adjust_stack_di_sub
11157 : gen_pro_epilogue_adjust_stack_si_sub);
11159 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11160 stack_pointer_rtx, eax));
11162 if (sp_is_cfa_reg || TARGET_SEH)
11164 if (sp_is_cfa_reg)
11165 m->fs.cfa_offset += allocate;
11166 RTX_FRAME_RELATED_P (insn) = 1;
11167 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11168 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11169 plus_constant (Pmode, stack_pointer_rtx,
11170 -allocate)));
11172 m->fs.sp_offset += allocate;
11174 /* Use stack_pointer_rtx for relative addressing so that code
11175 works for realigned stack, too. */
11176 if (r10_live && eax_live)
11178 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11179 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11180 gen_frame_mem (word_mode, t));
11181 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11182 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11183 gen_frame_mem (word_mode, t));
11185 else if (eax_live || r10_live)
11187 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11188 emit_move_insn (gen_rtx_REG (word_mode,
11189 (eax_live ? AX_REG : R10_REG)),
11190 gen_frame_mem (word_mode, t));
11193 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11195 /* If we havn't already set up the frame pointer, do so now. */
11196 if (frame_pointer_needed && !m->fs.fp_valid)
11198 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11199 GEN_INT (frame.stack_pointer_offset
11200 - frame.hard_frame_pointer_offset));
11201 insn = emit_insn (insn);
11202 RTX_FRAME_RELATED_P (insn) = 1;
11203 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11205 if (m->fs.cfa_reg == stack_pointer_rtx)
11206 m->fs.cfa_reg = hard_frame_pointer_rtx;
11207 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11208 m->fs.fp_valid = true;
11211 if (!int_registers_saved)
11212 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11213 if (!sse_registers_saved)
11214 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11216 pic_reg_used = false;
11217 /* We don't use pic-register for pe-coff target. */
11218 if (pic_offset_table_rtx
11219 && !TARGET_PECOFF
11220 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11221 || crtl->profile))
11223 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11225 if (alt_pic_reg_used != INVALID_REGNUM)
11226 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11228 pic_reg_used = true;
11231 if (pic_reg_used)
11233 if (TARGET_64BIT)
11235 if (ix86_cmodel == CM_LARGE_PIC)
11237 rtx_code_label *label;
11238 rtx tmp_reg;
11240 gcc_assert (Pmode == DImode);
11241 label = gen_label_rtx ();
11242 emit_label (label);
11243 LABEL_PRESERVE_P (label) = 1;
11244 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11245 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11246 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11247 label));
11248 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11249 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11250 pic_offset_table_rtx, tmp_reg));
11252 else
11253 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11255 else
11257 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11258 RTX_FRAME_RELATED_P (insn) = 1;
11259 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11263 /* In the pic_reg_used case, make sure that the got load isn't deleted
11264 when mcount needs it. Blockage to avoid call movement across mcount
11265 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11266 note. */
11267 if (crtl->profile && !flag_fentry && pic_reg_used)
11268 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11270 if (crtl->drap_reg && !crtl->stack_realign_needed)
11272 /* vDRAP is setup but after reload it turns out stack realign
11273 isn't necessary, here we will emit prologue to setup DRAP
11274 without stack realign adjustment */
11275 t = choose_baseaddr (0);
11276 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11279 /* Prevent instructions from being scheduled into register save push
11280 sequence when access to the redzone area is done through frame pointer.
11281 The offset between the frame pointer and the stack pointer is calculated
11282 relative to the value of the stack pointer at the end of the function
11283 prologue, and moving instructions that access redzone area via frame
11284 pointer inside push sequence violates this assumption. */
11285 if (frame_pointer_needed && frame.red_zone_size)
11286 emit_insn (gen_memory_blockage ());
11288 /* Emit cld instruction if stringops are used in the function. */
11289 if (TARGET_CLD && ix86_current_function_needs_cld)
11290 emit_insn (gen_cld ());
11292 /* SEH requires that the prologue end within 256 bytes of the start of
11293 the function. Prevent instruction schedules that would extend that.
11294 Further, prevent alloca modifications to the stack pointer from being
11295 combined with prologue modifications. */
11296 if (TARGET_SEH)
11297 emit_insn (gen_prologue_use (stack_pointer_rtx));
11300 /* Emit code to restore REG using a POP insn. */
11302 static void
11303 ix86_emit_restore_reg_using_pop (rtx reg)
11305 struct machine_function *m = cfun->machine;
11306 rtx insn = emit_insn (gen_pop (reg));
11308 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11309 m->fs.sp_offset -= UNITS_PER_WORD;
11311 if (m->fs.cfa_reg == crtl->drap_reg
11312 && REGNO (reg) == REGNO (crtl->drap_reg))
11314 /* Previously we'd represented the CFA as an expression
11315 like *(%ebp - 8). We've just popped that value from
11316 the stack, which means we need to reset the CFA to
11317 the drap register. This will remain until we restore
11318 the stack pointer. */
11319 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11320 RTX_FRAME_RELATED_P (insn) = 1;
11322 /* This means that the DRAP register is valid for addressing too. */
11323 m->fs.drap_valid = true;
11324 return;
11327 if (m->fs.cfa_reg == stack_pointer_rtx)
11329 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11330 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11331 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11332 RTX_FRAME_RELATED_P (insn) = 1;
11334 m->fs.cfa_offset -= UNITS_PER_WORD;
11337 /* When the frame pointer is the CFA, and we pop it, we are
11338 swapping back to the stack pointer as the CFA. This happens
11339 for stack frames that don't allocate other data, so we assume
11340 the stack pointer is now pointing at the return address, i.e.
11341 the function entry state, which makes the offset be 1 word. */
11342 if (reg == hard_frame_pointer_rtx)
11344 m->fs.fp_valid = false;
11345 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11347 m->fs.cfa_reg = stack_pointer_rtx;
11348 m->fs.cfa_offset -= UNITS_PER_WORD;
11350 add_reg_note (insn, REG_CFA_DEF_CFA,
11351 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11352 GEN_INT (m->fs.cfa_offset)));
11353 RTX_FRAME_RELATED_P (insn) = 1;
11358 /* Emit code to restore saved registers using POP insns. */
11360 static void
11361 ix86_emit_restore_regs_using_pop (void)
11363 unsigned int regno;
11365 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11366 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11367 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11370 /* Emit code and notes for the LEAVE instruction. */
11372 static void
11373 ix86_emit_leave (void)
11375 struct machine_function *m = cfun->machine;
11376 rtx insn = emit_insn (ix86_gen_leave ());
11378 ix86_add_queued_cfa_restore_notes (insn);
11380 gcc_assert (m->fs.fp_valid);
11381 m->fs.sp_valid = true;
11382 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11383 m->fs.fp_valid = false;
11385 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11387 m->fs.cfa_reg = stack_pointer_rtx;
11388 m->fs.cfa_offset = m->fs.sp_offset;
11390 add_reg_note (insn, REG_CFA_DEF_CFA,
11391 plus_constant (Pmode, stack_pointer_rtx,
11392 m->fs.sp_offset));
11393 RTX_FRAME_RELATED_P (insn) = 1;
11395 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11396 m->fs.fp_offset);
11399 /* Emit code to restore saved registers using MOV insns.
11400 First register is restored from CFA - CFA_OFFSET. */
11401 static void
11402 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11403 bool maybe_eh_return)
11405 struct machine_function *m = cfun->machine;
11406 unsigned int regno;
11408 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11409 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11411 rtx reg = gen_rtx_REG (word_mode, regno);
11412 rtx insn, mem;
11414 mem = choose_baseaddr (cfa_offset);
11415 mem = gen_frame_mem (word_mode, mem);
11416 insn = emit_move_insn (reg, mem);
11418 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11420 /* Previously we'd represented the CFA as an expression
11421 like *(%ebp - 8). We've just popped that value from
11422 the stack, which means we need to reset the CFA to
11423 the drap register. This will remain until we restore
11424 the stack pointer. */
11425 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11426 RTX_FRAME_RELATED_P (insn) = 1;
11428 /* This means that the DRAP register is valid for addressing. */
11429 m->fs.drap_valid = true;
11431 else
11432 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11434 cfa_offset -= UNITS_PER_WORD;
11438 /* Emit code to restore saved registers using MOV insns.
11439 First register is restored from CFA - CFA_OFFSET. */
11440 static void
11441 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11442 bool maybe_eh_return)
11444 unsigned int regno;
11446 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11447 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11449 rtx reg = gen_rtx_REG (V4SFmode, regno);
11450 rtx mem;
11452 mem = choose_baseaddr (cfa_offset);
11453 mem = gen_rtx_MEM (V4SFmode, mem);
11454 set_mem_align (mem, 128);
11455 emit_move_insn (reg, mem);
11457 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11459 cfa_offset -= 16;
11463 /* Restore function stack, frame, and registers. */
11465 void
11466 ix86_expand_epilogue (int style)
11468 struct machine_function *m = cfun->machine;
11469 struct machine_frame_state frame_state_save = m->fs;
11470 struct ix86_frame frame;
11471 bool restore_regs_via_mov;
11472 bool using_drap;
11474 ix86_finalize_stack_realign_flags ();
11475 ix86_compute_frame_layout (&frame);
11477 m->fs.sp_valid = (!frame_pointer_needed
11478 || (crtl->sp_is_unchanging
11479 && !stack_realign_fp));
11480 gcc_assert (!m->fs.sp_valid
11481 || m->fs.sp_offset == frame.stack_pointer_offset);
11483 /* The FP must be valid if the frame pointer is present. */
11484 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11485 gcc_assert (!m->fs.fp_valid
11486 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11488 /* We must have *some* valid pointer to the stack frame. */
11489 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11491 /* The DRAP is never valid at this point. */
11492 gcc_assert (!m->fs.drap_valid);
11494 /* See the comment about red zone and frame
11495 pointer usage in ix86_expand_prologue. */
11496 if (frame_pointer_needed && frame.red_zone_size)
11497 emit_insn (gen_memory_blockage ());
11499 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11500 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11502 /* Determine the CFA offset of the end of the red-zone. */
11503 m->fs.red_zone_offset = 0;
11504 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11506 /* The red-zone begins below the return address. */
11507 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11509 /* When the register save area is in the aligned portion of
11510 the stack, determine the maximum runtime displacement that
11511 matches up with the aligned frame. */
11512 if (stack_realign_drap)
11513 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11514 + UNITS_PER_WORD);
11517 /* Special care must be taken for the normal return case of a function
11518 using eh_return: the eax and edx registers are marked as saved, but
11519 not restored along this path. Adjust the save location to match. */
11520 if (crtl->calls_eh_return && style != 2)
11521 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11523 /* EH_RETURN requires the use of moves to function properly. */
11524 if (crtl->calls_eh_return)
11525 restore_regs_via_mov = true;
11526 /* SEH requires the use of pops to identify the epilogue. */
11527 else if (TARGET_SEH)
11528 restore_regs_via_mov = false;
11529 /* If we're only restoring one register and sp is not valid then
11530 using a move instruction to restore the register since it's
11531 less work than reloading sp and popping the register. */
11532 else if (!m->fs.sp_valid && frame.nregs <= 1)
11533 restore_regs_via_mov = true;
11534 else if (TARGET_EPILOGUE_USING_MOVE
11535 && cfun->machine->use_fast_prologue_epilogue
11536 && (frame.nregs > 1
11537 || m->fs.sp_offset != frame.reg_save_offset))
11538 restore_regs_via_mov = true;
11539 else if (frame_pointer_needed
11540 && !frame.nregs
11541 && m->fs.sp_offset != frame.reg_save_offset)
11542 restore_regs_via_mov = true;
11543 else if (frame_pointer_needed
11544 && TARGET_USE_LEAVE
11545 && cfun->machine->use_fast_prologue_epilogue
11546 && frame.nregs == 1)
11547 restore_regs_via_mov = true;
11548 else
11549 restore_regs_via_mov = false;
11551 if (restore_regs_via_mov || frame.nsseregs)
11553 /* Ensure that the entire register save area is addressable via
11554 the stack pointer, if we will restore via sp. */
11555 if (TARGET_64BIT
11556 && m->fs.sp_offset > 0x7fffffff
11557 && !(m->fs.fp_valid || m->fs.drap_valid)
11558 && (frame.nsseregs + frame.nregs) != 0)
11560 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11561 GEN_INT (m->fs.sp_offset
11562 - frame.sse_reg_save_offset),
11563 style,
11564 m->fs.cfa_reg == stack_pointer_rtx);
11568 /* If there are any SSE registers to restore, then we have to do it
11569 via moves, since there's obviously no pop for SSE regs. */
11570 if (frame.nsseregs)
11571 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11572 style == 2);
11574 if (restore_regs_via_mov)
11576 rtx t;
11578 if (frame.nregs)
11579 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11581 /* eh_return epilogues need %ecx added to the stack pointer. */
11582 if (style == 2)
11584 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11586 /* Stack align doesn't work with eh_return. */
11587 gcc_assert (!stack_realign_drap);
11588 /* Neither does regparm nested functions. */
11589 gcc_assert (!ix86_static_chain_on_stack);
11591 if (frame_pointer_needed)
11593 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11594 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11595 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11597 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11598 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11600 /* Note that we use SA as a temporary CFA, as the return
11601 address is at the proper place relative to it. We
11602 pretend this happens at the FP restore insn because
11603 prior to this insn the FP would be stored at the wrong
11604 offset relative to SA, and after this insn we have no
11605 other reasonable register to use for the CFA. We don't
11606 bother resetting the CFA to the SP for the duration of
11607 the return insn. */
11608 add_reg_note (insn, REG_CFA_DEF_CFA,
11609 plus_constant (Pmode, sa, UNITS_PER_WORD));
11610 ix86_add_queued_cfa_restore_notes (insn);
11611 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11612 RTX_FRAME_RELATED_P (insn) = 1;
11614 m->fs.cfa_reg = sa;
11615 m->fs.cfa_offset = UNITS_PER_WORD;
11616 m->fs.fp_valid = false;
11618 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11619 const0_rtx, style, false);
11621 else
11623 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11624 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11625 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11626 ix86_add_queued_cfa_restore_notes (insn);
11628 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11629 if (m->fs.cfa_offset != UNITS_PER_WORD)
11631 m->fs.cfa_offset = UNITS_PER_WORD;
11632 add_reg_note (insn, REG_CFA_DEF_CFA,
11633 plus_constant (Pmode, stack_pointer_rtx,
11634 UNITS_PER_WORD));
11635 RTX_FRAME_RELATED_P (insn) = 1;
11638 m->fs.sp_offset = UNITS_PER_WORD;
11639 m->fs.sp_valid = true;
11642 else
11644 /* SEH requires that the function end with (1) a stack adjustment
11645 if necessary, (2) a sequence of pops, and (3) a return or
11646 jump instruction. Prevent insns from the function body from
11647 being scheduled into this sequence. */
11648 if (TARGET_SEH)
11650 /* Prevent a catch region from being adjacent to the standard
11651 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11652 several other flags that would be interesting to test are
11653 not yet set up. */
11654 if (flag_non_call_exceptions)
11655 emit_insn (gen_nops (const1_rtx));
11656 else
11657 emit_insn (gen_blockage ());
11660 /* First step is to deallocate the stack frame so that we can
11661 pop the registers. Also do it on SEH target for very large
11662 frame as the emitted instructions aren't allowed by the ABI in
11663 epilogues. */
11664 if (!m->fs.sp_valid
11665 || (TARGET_SEH
11666 && (m->fs.sp_offset - frame.reg_save_offset
11667 >= SEH_MAX_FRAME_SIZE)))
11669 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11670 GEN_INT (m->fs.fp_offset
11671 - frame.reg_save_offset),
11672 style, false);
11674 else if (m->fs.sp_offset != frame.reg_save_offset)
11676 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11677 GEN_INT (m->fs.sp_offset
11678 - frame.reg_save_offset),
11679 style,
11680 m->fs.cfa_reg == stack_pointer_rtx);
11683 ix86_emit_restore_regs_using_pop ();
11686 /* If we used a stack pointer and haven't already got rid of it,
11687 then do so now. */
11688 if (m->fs.fp_valid)
11690 /* If the stack pointer is valid and pointing at the frame
11691 pointer store address, then we only need a pop. */
11692 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11693 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11694 /* Leave results in shorter dependency chains on CPUs that are
11695 able to grok it fast. */
11696 else if (TARGET_USE_LEAVE
11697 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11698 || !cfun->machine->use_fast_prologue_epilogue)
11699 ix86_emit_leave ();
11700 else
11702 pro_epilogue_adjust_stack (stack_pointer_rtx,
11703 hard_frame_pointer_rtx,
11704 const0_rtx, style, !using_drap);
11705 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11709 if (using_drap)
11711 int param_ptr_offset = UNITS_PER_WORD;
11712 rtx insn;
11714 gcc_assert (stack_realign_drap);
11716 if (ix86_static_chain_on_stack)
11717 param_ptr_offset += UNITS_PER_WORD;
11718 if (!call_used_regs[REGNO (crtl->drap_reg)])
11719 param_ptr_offset += UNITS_PER_WORD;
11721 insn = emit_insn (gen_rtx_SET
11722 (VOIDmode, stack_pointer_rtx,
11723 gen_rtx_PLUS (Pmode,
11724 crtl->drap_reg,
11725 GEN_INT (-param_ptr_offset))));
11726 m->fs.cfa_reg = stack_pointer_rtx;
11727 m->fs.cfa_offset = param_ptr_offset;
11728 m->fs.sp_offset = param_ptr_offset;
11729 m->fs.realigned = false;
11731 add_reg_note (insn, REG_CFA_DEF_CFA,
11732 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11733 GEN_INT (param_ptr_offset)));
11734 RTX_FRAME_RELATED_P (insn) = 1;
11736 if (!call_used_regs[REGNO (crtl->drap_reg)])
11737 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11740 /* At this point the stack pointer must be valid, and we must have
11741 restored all of the registers. We may not have deallocated the
11742 entire stack frame. We've delayed this until now because it may
11743 be possible to merge the local stack deallocation with the
11744 deallocation forced by ix86_static_chain_on_stack. */
11745 gcc_assert (m->fs.sp_valid);
11746 gcc_assert (!m->fs.fp_valid);
11747 gcc_assert (!m->fs.realigned);
11748 if (m->fs.sp_offset != UNITS_PER_WORD)
11750 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11751 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11752 style, true);
11754 else
11755 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11757 /* Sibcall epilogues don't want a return instruction. */
11758 if (style == 0)
11760 m->fs = frame_state_save;
11761 return;
11764 if (crtl->args.pops_args && crtl->args.size)
11766 rtx popc = GEN_INT (crtl->args.pops_args);
11768 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11769 address, do explicit add, and jump indirectly to the caller. */
11771 if (crtl->args.pops_args >= 65536)
11773 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11774 rtx insn;
11776 /* There is no "pascal" calling convention in any 64bit ABI. */
11777 gcc_assert (!TARGET_64BIT);
11779 insn = emit_insn (gen_pop (ecx));
11780 m->fs.cfa_offset -= UNITS_PER_WORD;
11781 m->fs.sp_offset -= UNITS_PER_WORD;
11783 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11784 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11785 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11786 add_reg_note (insn, REG_CFA_REGISTER,
11787 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11788 RTX_FRAME_RELATED_P (insn) = 1;
11790 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11791 popc, -1, true);
11792 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11794 else
11795 emit_jump_insn (gen_simple_return_pop_internal (popc));
11797 else
11798 emit_jump_insn (gen_simple_return_internal ());
11800 /* Restore the state back to the state from the prologue,
11801 so that it's correct for the next epilogue. */
11802 m->fs = frame_state_save;
11805 /* Reset from the function's potential modifications. */
11807 static void
11808 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11810 if (pic_offset_table_rtx)
11811 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11812 #if TARGET_MACHO
11813 /* Mach-O doesn't support labels at the end of objects, so if
11814 it looks like we might want one, insert a NOP. */
11816 rtx_insn *insn = get_last_insn ();
11817 rtx_insn *deleted_debug_label = NULL;
11818 while (insn
11819 && NOTE_P (insn)
11820 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11822 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11823 notes only, instead set their CODE_LABEL_NUMBER to -1,
11824 otherwise there would be code generation differences
11825 in between -g and -g0. */
11826 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11827 deleted_debug_label = insn;
11828 insn = PREV_INSN (insn);
11830 if (insn
11831 && (LABEL_P (insn)
11832 || (NOTE_P (insn)
11833 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11834 fputs ("\tnop\n", file);
11835 else if (deleted_debug_label)
11836 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11837 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11838 CODE_LABEL_NUMBER (insn) = -1;
11840 #endif
11844 /* Return a scratch register to use in the split stack prologue. The
11845 split stack prologue is used for -fsplit-stack. It is the first
11846 instructions in the function, even before the regular prologue.
11847 The scratch register can be any caller-saved register which is not
11848 used for parameters or for the static chain. */
11850 static unsigned int
11851 split_stack_prologue_scratch_regno (void)
11853 if (TARGET_64BIT)
11854 return R11_REG;
11855 else
11857 bool is_fastcall, is_thiscall;
11858 int regparm;
11860 is_fastcall = (lookup_attribute ("fastcall",
11861 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11862 != NULL);
11863 is_thiscall = (lookup_attribute ("thiscall",
11864 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11865 != NULL);
11866 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11868 if (is_fastcall)
11870 if (DECL_STATIC_CHAIN (cfun->decl))
11872 sorry ("-fsplit-stack does not support fastcall with "
11873 "nested function");
11874 return INVALID_REGNUM;
11876 return AX_REG;
11878 else if (is_thiscall)
11880 if (!DECL_STATIC_CHAIN (cfun->decl))
11881 return DX_REG;
11882 return AX_REG;
11884 else if (regparm < 3)
11886 if (!DECL_STATIC_CHAIN (cfun->decl))
11887 return CX_REG;
11888 else
11890 if (regparm >= 2)
11892 sorry ("-fsplit-stack does not support 2 register "
11893 "parameters for a nested function");
11894 return INVALID_REGNUM;
11896 return DX_REG;
11899 else
11901 /* FIXME: We could make this work by pushing a register
11902 around the addition and comparison. */
11903 sorry ("-fsplit-stack does not support 3 register parameters");
11904 return INVALID_REGNUM;
11909 /* A SYMBOL_REF for the function which allocates new stackspace for
11910 -fsplit-stack. */
11912 static GTY(()) rtx split_stack_fn;
11914 /* A SYMBOL_REF for the more stack function when using the large
11915 model. */
11917 static GTY(()) rtx split_stack_fn_large;
11919 /* Handle -fsplit-stack. These are the first instructions in the
11920 function, even before the regular prologue. */
11922 void
11923 ix86_expand_split_stack_prologue (void)
11925 struct ix86_frame frame;
11926 HOST_WIDE_INT allocate;
11927 unsigned HOST_WIDE_INT args_size;
11928 rtx_code_label *label;
11929 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11930 rtx scratch_reg = NULL_RTX;
11931 rtx_code_label *varargs_label = NULL;
11932 rtx fn;
11934 gcc_assert (flag_split_stack && reload_completed);
11936 ix86_finalize_stack_realign_flags ();
11937 ix86_compute_frame_layout (&frame);
11938 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11940 /* This is the label we will branch to if we have enough stack
11941 space. We expect the basic block reordering pass to reverse this
11942 branch if optimizing, so that we branch in the unlikely case. */
11943 label = gen_label_rtx ();
11945 /* We need to compare the stack pointer minus the frame size with
11946 the stack boundary in the TCB. The stack boundary always gives
11947 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11948 can compare directly. Otherwise we need to do an addition. */
11950 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11951 UNSPEC_STACK_CHECK);
11952 limit = gen_rtx_CONST (Pmode, limit);
11953 limit = gen_rtx_MEM (Pmode, limit);
11954 if (allocate < SPLIT_STACK_AVAILABLE)
11955 current = stack_pointer_rtx;
11956 else
11958 unsigned int scratch_regno;
11959 rtx offset;
11961 /* We need a scratch register to hold the stack pointer minus
11962 the required frame size. Since this is the very start of the
11963 function, the scratch register can be any caller-saved
11964 register which is not used for parameters. */
11965 offset = GEN_INT (- allocate);
11966 scratch_regno = split_stack_prologue_scratch_regno ();
11967 if (scratch_regno == INVALID_REGNUM)
11968 return;
11969 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11970 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11972 /* We don't use ix86_gen_add3 in this case because it will
11973 want to split to lea, but when not optimizing the insn
11974 will not be split after this point. */
11975 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11976 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11977 offset)));
11979 else
11981 emit_move_insn (scratch_reg, offset);
11982 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11983 stack_pointer_rtx));
11985 current = scratch_reg;
11988 ix86_expand_branch (GEU, current, limit, label);
11989 jump_insn = get_last_insn ();
11990 JUMP_LABEL (jump_insn) = label;
11992 /* Mark the jump as very likely to be taken. */
11993 add_int_reg_note (jump_insn, REG_BR_PROB,
11994 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11996 if (split_stack_fn == NULL_RTX)
11997 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11998 fn = split_stack_fn;
12000 /* Get more stack space. We pass in the desired stack space and the
12001 size of the arguments to copy to the new stack. In 32-bit mode
12002 we push the parameters; __morestack will return on a new stack
12003 anyhow. In 64-bit mode we pass the parameters in r10 and
12004 r11. */
12005 allocate_rtx = GEN_INT (allocate);
12006 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12007 call_fusage = NULL_RTX;
12008 if (TARGET_64BIT)
12010 rtx reg10, reg11;
12012 reg10 = gen_rtx_REG (Pmode, R10_REG);
12013 reg11 = gen_rtx_REG (Pmode, R11_REG);
12015 /* If this function uses a static chain, it will be in %r10.
12016 Preserve it across the call to __morestack. */
12017 if (DECL_STATIC_CHAIN (cfun->decl))
12019 rtx rax;
12021 rax = gen_rtx_REG (word_mode, AX_REG);
12022 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12023 use_reg (&call_fusage, rax);
12026 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12027 && !TARGET_PECOFF)
12029 HOST_WIDE_INT argval;
12031 gcc_assert (Pmode == DImode);
12032 /* When using the large model we need to load the address
12033 into a register, and we've run out of registers. So we
12034 switch to a different calling convention, and we call a
12035 different function: __morestack_large. We pass the
12036 argument size in the upper 32 bits of r10 and pass the
12037 frame size in the lower 32 bits. */
12038 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12039 gcc_assert ((args_size & 0xffffffff) == args_size);
12041 if (split_stack_fn_large == NULL_RTX)
12042 split_stack_fn_large =
12043 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12045 if (ix86_cmodel == CM_LARGE_PIC)
12047 rtx_code_label *label;
12048 rtx x;
12050 label = gen_label_rtx ();
12051 emit_label (label);
12052 LABEL_PRESERVE_P (label) = 1;
12053 emit_insn (gen_set_rip_rex64 (reg10, label));
12054 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12055 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12056 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12057 UNSPEC_GOT);
12058 x = gen_rtx_CONST (Pmode, x);
12059 emit_move_insn (reg11, x);
12060 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12061 x = gen_const_mem (Pmode, x);
12062 emit_move_insn (reg11, x);
12064 else
12065 emit_move_insn (reg11, split_stack_fn_large);
12067 fn = reg11;
12069 argval = ((args_size << 16) << 16) + allocate;
12070 emit_move_insn (reg10, GEN_INT (argval));
12072 else
12074 emit_move_insn (reg10, allocate_rtx);
12075 emit_move_insn (reg11, GEN_INT (args_size));
12076 use_reg (&call_fusage, reg11);
12079 use_reg (&call_fusage, reg10);
12081 else
12083 emit_insn (gen_push (GEN_INT (args_size)));
12084 emit_insn (gen_push (allocate_rtx));
12086 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12087 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12088 NULL_RTX, false);
12089 add_function_usage_to (call_insn, call_fusage);
12091 /* In order to make call/return prediction work right, we now need
12092 to execute a return instruction. See
12093 libgcc/config/i386/morestack.S for the details on how this works.
12095 For flow purposes gcc must not see this as a return
12096 instruction--we need control flow to continue at the subsequent
12097 label. Therefore, we use an unspec. */
12098 gcc_assert (crtl->args.pops_args < 65536);
12099 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12101 /* If we are in 64-bit mode and this function uses a static chain,
12102 we saved %r10 in %rax before calling _morestack. */
12103 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12104 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12105 gen_rtx_REG (word_mode, AX_REG));
12107 /* If this function calls va_start, we need to store a pointer to
12108 the arguments on the old stack, because they may not have been
12109 all copied to the new stack. At this point the old stack can be
12110 found at the frame pointer value used by __morestack, because
12111 __morestack has set that up before calling back to us. Here we
12112 store that pointer in a scratch register, and in
12113 ix86_expand_prologue we store the scratch register in a stack
12114 slot. */
12115 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12117 unsigned int scratch_regno;
12118 rtx frame_reg;
12119 int words;
12121 scratch_regno = split_stack_prologue_scratch_regno ();
12122 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12123 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12125 /* 64-bit:
12126 fp -> old fp value
12127 return address within this function
12128 return address of caller of this function
12129 stack arguments
12130 So we add three words to get to the stack arguments.
12132 32-bit:
12133 fp -> old fp value
12134 return address within this function
12135 first argument to __morestack
12136 second argument to __morestack
12137 return address of caller of this function
12138 stack arguments
12139 So we add five words to get to the stack arguments.
12141 words = TARGET_64BIT ? 3 : 5;
12142 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12143 gen_rtx_PLUS (Pmode, frame_reg,
12144 GEN_INT (words * UNITS_PER_WORD))));
12146 varargs_label = gen_label_rtx ();
12147 emit_jump_insn (gen_jump (varargs_label));
12148 JUMP_LABEL (get_last_insn ()) = varargs_label;
12150 emit_barrier ();
12153 emit_label (label);
12154 LABEL_NUSES (label) = 1;
12156 /* If this function calls va_start, we now have to set the scratch
12157 register for the case where we do not call __morestack. In this
12158 case we need to set it based on the stack pointer. */
12159 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12161 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12162 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12163 GEN_INT (UNITS_PER_WORD))));
12165 emit_label (varargs_label);
12166 LABEL_NUSES (varargs_label) = 1;
12170 /* We may have to tell the dataflow pass that the split stack prologue
12171 is initializing a scratch register. */
12173 static void
12174 ix86_live_on_entry (bitmap regs)
12176 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12178 gcc_assert (flag_split_stack);
12179 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12183 /* Extract the parts of an RTL expression that is a valid memory address
12184 for an instruction. Return 0 if the structure of the address is
12185 grossly off. Return -1 if the address contains ASHIFT, so it is not
12186 strictly valid, but still used for computing length of lea instruction. */
12189 ix86_decompose_address (rtx addr, struct ix86_address *out)
12191 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12192 rtx base_reg, index_reg;
12193 HOST_WIDE_INT scale = 1;
12194 rtx scale_rtx = NULL_RTX;
12195 rtx tmp;
12196 int retval = 1;
12197 enum ix86_address_seg seg = SEG_DEFAULT;
12199 /* Allow zero-extended SImode addresses,
12200 they will be emitted with addr32 prefix. */
12201 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12203 if (GET_CODE (addr) == ZERO_EXTEND
12204 && GET_MODE (XEXP (addr, 0)) == SImode)
12206 addr = XEXP (addr, 0);
12207 if (CONST_INT_P (addr))
12208 return 0;
12210 else if (GET_CODE (addr) == AND
12211 && const_32bit_mask (XEXP (addr, 1), DImode))
12213 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12214 if (addr == NULL_RTX)
12215 return 0;
12217 if (CONST_INT_P (addr))
12218 return 0;
12222 /* Allow SImode subregs of DImode addresses,
12223 they will be emitted with addr32 prefix. */
12224 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12226 if (GET_CODE (addr) == SUBREG
12227 && GET_MODE (SUBREG_REG (addr)) == DImode)
12229 addr = SUBREG_REG (addr);
12230 if (CONST_INT_P (addr))
12231 return 0;
12235 if (REG_P (addr))
12236 base = addr;
12237 else if (GET_CODE (addr) == SUBREG)
12239 if (REG_P (SUBREG_REG (addr)))
12240 base = addr;
12241 else
12242 return 0;
12244 else if (GET_CODE (addr) == PLUS)
12246 rtx addends[4], op;
12247 int n = 0, i;
12249 op = addr;
12252 if (n >= 4)
12253 return 0;
12254 addends[n++] = XEXP (op, 1);
12255 op = XEXP (op, 0);
12257 while (GET_CODE (op) == PLUS);
12258 if (n >= 4)
12259 return 0;
12260 addends[n] = op;
12262 for (i = n; i >= 0; --i)
12264 op = addends[i];
12265 switch (GET_CODE (op))
12267 case MULT:
12268 if (index)
12269 return 0;
12270 index = XEXP (op, 0);
12271 scale_rtx = XEXP (op, 1);
12272 break;
12274 case ASHIFT:
12275 if (index)
12276 return 0;
12277 index = XEXP (op, 0);
12278 tmp = XEXP (op, 1);
12279 if (!CONST_INT_P (tmp))
12280 return 0;
12281 scale = INTVAL (tmp);
12282 if ((unsigned HOST_WIDE_INT) scale > 3)
12283 return 0;
12284 scale = 1 << scale;
12285 break;
12287 case ZERO_EXTEND:
12288 op = XEXP (op, 0);
12289 if (GET_CODE (op) != UNSPEC)
12290 return 0;
12291 /* FALLTHRU */
12293 case UNSPEC:
12294 if (XINT (op, 1) == UNSPEC_TP
12295 && TARGET_TLS_DIRECT_SEG_REFS
12296 && seg == SEG_DEFAULT)
12297 seg = DEFAULT_TLS_SEG_REG;
12298 else
12299 return 0;
12300 break;
12302 case SUBREG:
12303 if (!REG_P (SUBREG_REG (op)))
12304 return 0;
12305 /* FALLTHRU */
12307 case REG:
12308 if (!base)
12309 base = op;
12310 else if (!index)
12311 index = op;
12312 else
12313 return 0;
12314 break;
12316 case CONST:
12317 case CONST_INT:
12318 case SYMBOL_REF:
12319 case LABEL_REF:
12320 if (disp)
12321 return 0;
12322 disp = op;
12323 break;
12325 default:
12326 return 0;
12330 else if (GET_CODE (addr) == MULT)
12332 index = XEXP (addr, 0); /* index*scale */
12333 scale_rtx = XEXP (addr, 1);
12335 else if (GET_CODE (addr) == ASHIFT)
12337 /* We're called for lea too, which implements ashift on occasion. */
12338 index = XEXP (addr, 0);
12339 tmp = XEXP (addr, 1);
12340 if (!CONST_INT_P (tmp))
12341 return 0;
12342 scale = INTVAL (tmp);
12343 if ((unsigned HOST_WIDE_INT) scale > 3)
12344 return 0;
12345 scale = 1 << scale;
12346 retval = -1;
12348 else
12349 disp = addr; /* displacement */
12351 if (index)
12353 if (REG_P (index))
12355 else if (GET_CODE (index) == SUBREG
12356 && REG_P (SUBREG_REG (index)))
12358 else
12359 return 0;
12362 /* Extract the integral value of scale. */
12363 if (scale_rtx)
12365 if (!CONST_INT_P (scale_rtx))
12366 return 0;
12367 scale = INTVAL (scale_rtx);
12370 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12371 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12373 /* Avoid useless 0 displacement. */
12374 if (disp == const0_rtx && (base || index))
12375 disp = NULL_RTX;
12377 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12378 if (base_reg && index_reg && scale == 1
12379 && (index_reg == arg_pointer_rtx
12380 || index_reg == frame_pointer_rtx
12381 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12383 rtx tmp;
12384 tmp = base, base = index, index = tmp;
12385 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12388 /* Special case: %ebp cannot be encoded as a base without a displacement.
12389 Similarly %r13. */
12390 if (!disp
12391 && base_reg
12392 && (base_reg == hard_frame_pointer_rtx
12393 || base_reg == frame_pointer_rtx
12394 || base_reg == arg_pointer_rtx
12395 || (REG_P (base_reg)
12396 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12397 || REGNO (base_reg) == R13_REG))))
12398 disp = const0_rtx;
12400 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12401 Avoid this by transforming to [%esi+0].
12402 Reload calls address legitimization without cfun defined, so we need
12403 to test cfun for being non-NULL. */
12404 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12405 && base_reg && !index_reg && !disp
12406 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12407 disp = const0_rtx;
12409 /* Special case: encode reg+reg instead of reg*2. */
12410 if (!base && index && scale == 2)
12411 base = index, base_reg = index_reg, scale = 1;
12413 /* Special case: scaling cannot be encoded without base or displacement. */
12414 if (!base && !disp && index && scale != 1)
12415 disp = const0_rtx;
12417 out->base = base;
12418 out->index = index;
12419 out->disp = disp;
12420 out->scale = scale;
12421 out->seg = seg;
12423 return retval;
12426 /* Return cost of the memory address x.
12427 For i386, it is better to use a complex address than let gcc copy
12428 the address into a reg and make a new pseudo. But not if the address
12429 requires to two regs - that would mean more pseudos with longer
12430 lifetimes. */
12431 static int
12432 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12434 struct ix86_address parts;
12435 int cost = 1;
12436 int ok = ix86_decompose_address (x, &parts);
12438 gcc_assert (ok);
12440 if (parts.base && GET_CODE (parts.base) == SUBREG)
12441 parts.base = SUBREG_REG (parts.base);
12442 if (parts.index && GET_CODE (parts.index) == SUBREG)
12443 parts.index = SUBREG_REG (parts.index);
12445 /* Attempt to minimize number of registers in the address. */
12446 if ((parts.base
12447 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12448 || (parts.index
12449 && (!REG_P (parts.index)
12450 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12451 cost++;
12453 if (parts.base
12454 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12455 && parts.index
12456 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12457 && parts.base != parts.index)
12458 cost++;
12460 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12461 since it's predecode logic can't detect the length of instructions
12462 and it degenerates to vector decoded. Increase cost of such
12463 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12464 to split such addresses or even refuse such addresses at all.
12466 Following addressing modes are affected:
12467 [base+scale*index]
12468 [scale*index+disp]
12469 [base+index]
12471 The first and last case may be avoidable by explicitly coding the zero in
12472 memory address, but I don't have AMD-K6 machine handy to check this
12473 theory. */
12475 if (TARGET_K6
12476 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12477 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12478 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12479 cost += 10;
12481 return cost;
12484 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12485 this is used for to form addresses to local data when -fPIC is in
12486 use. */
12488 static bool
12489 darwin_local_data_pic (rtx disp)
12491 return (GET_CODE (disp) == UNSPEC
12492 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12495 /* Determine if a given RTX is a valid constant. We already know this
12496 satisfies CONSTANT_P. */
12498 static bool
12499 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12501 switch (GET_CODE (x))
12503 case CONST:
12504 x = XEXP (x, 0);
12506 if (GET_CODE (x) == PLUS)
12508 if (!CONST_INT_P (XEXP (x, 1)))
12509 return false;
12510 x = XEXP (x, 0);
12513 if (TARGET_MACHO && darwin_local_data_pic (x))
12514 return true;
12516 /* Only some unspecs are valid as "constants". */
12517 if (GET_CODE (x) == UNSPEC)
12518 switch (XINT (x, 1))
12520 case UNSPEC_GOT:
12521 case UNSPEC_GOTOFF:
12522 case UNSPEC_PLTOFF:
12523 return TARGET_64BIT;
12524 case UNSPEC_TPOFF:
12525 case UNSPEC_NTPOFF:
12526 x = XVECEXP (x, 0, 0);
12527 return (GET_CODE (x) == SYMBOL_REF
12528 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12529 case UNSPEC_DTPOFF:
12530 x = XVECEXP (x, 0, 0);
12531 return (GET_CODE (x) == SYMBOL_REF
12532 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12533 default:
12534 return false;
12537 /* We must have drilled down to a symbol. */
12538 if (GET_CODE (x) == LABEL_REF)
12539 return true;
12540 if (GET_CODE (x) != SYMBOL_REF)
12541 return false;
12542 /* FALLTHRU */
12544 case SYMBOL_REF:
12545 /* TLS symbols are never valid. */
12546 if (SYMBOL_REF_TLS_MODEL (x))
12547 return false;
12549 /* DLLIMPORT symbols are never valid. */
12550 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12551 && SYMBOL_REF_DLLIMPORT_P (x))
12552 return false;
12554 #if TARGET_MACHO
12555 /* mdynamic-no-pic */
12556 if (MACHO_DYNAMIC_NO_PIC_P)
12557 return machopic_symbol_defined_p (x);
12558 #endif
12559 break;
12561 case CONST_DOUBLE:
12562 if (GET_MODE (x) == TImode
12563 && x != CONST0_RTX (TImode)
12564 && !TARGET_64BIT)
12565 return false;
12566 break;
12568 case CONST_VECTOR:
12569 if (!standard_sse_constant_p (x))
12570 return false;
12572 default:
12573 break;
12576 /* Otherwise we handle everything else in the move patterns. */
12577 return true;
12580 /* Determine if it's legal to put X into the constant pool. This
12581 is not possible for the address of thread-local symbols, which
12582 is checked above. */
12584 static bool
12585 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12587 /* We can always put integral constants and vectors in memory. */
12588 switch (GET_CODE (x))
12590 case CONST_INT:
12591 case CONST_DOUBLE:
12592 case CONST_VECTOR:
12593 return false;
12595 default:
12596 break;
12598 return !ix86_legitimate_constant_p (mode, x);
12601 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12602 otherwise zero. */
12604 static bool
12605 is_imported_p (rtx x)
12607 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12608 || GET_CODE (x) != SYMBOL_REF)
12609 return false;
12611 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12615 /* Nonzero if the constant value X is a legitimate general operand
12616 when generating PIC code. It is given that flag_pic is on and
12617 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12619 bool
12620 legitimate_pic_operand_p (rtx x)
12622 rtx inner;
12624 switch (GET_CODE (x))
12626 case CONST:
12627 inner = XEXP (x, 0);
12628 if (GET_CODE (inner) == PLUS
12629 && CONST_INT_P (XEXP (inner, 1)))
12630 inner = XEXP (inner, 0);
12632 /* Only some unspecs are valid as "constants". */
12633 if (GET_CODE (inner) == UNSPEC)
12634 switch (XINT (inner, 1))
12636 case UNSPEC_GOT:
12637 case UNSPEC_GOTOFF:
12638 case UNSPEC_PLTOFF:
12639 return TARGET_64BIT;
12640 case UNSPEC_TPOFF:
12641 x = XVECEXP (inner, 0, 0);
12642 return (GET_CODE (x) == SYMBOL_REF
12643 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12644 case UNSPEC_MACHOPIC_OFFSET:
12645 return legitimate_pic_address_disp_p (x);
12646 default:
12647 return false;
12649 /* FALLTHRU */
12651 case SYMBOL_REF:
12652 case LABEL_REF:
12653 return legitimate_pic_address_disp_p (x);
12655 default:
12656 return true;
12660 /* Determine if a given CONST RTX is a valid memory displacement
12661 in PIC mode. */
12663 bool
12664 legitimate_pic_address_disp_p (rtx disp)
12666 bool saw_plus;
12668 /* In 64bit mode we can allow direct addresses of symbols and labels
12669 when they are not dynamic symbols. */
12670 if (TARGET_64BIT)
12672 rtx op0 = disp, op1;
12674 switch (GET_CODE (disp))
12676 case LABEL_REF:
12677 return true;
12679 case CONST:
12680 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12681 break;
12682 op0 = XEXP (XEXP (disp, 0), 0);
12683 op1 = XEXP (XEXP (disp, 0), 1);
12684 if (!CONST_INT_P (op1)
12685 || INTVAL (op1) >= 16*1024*1024
12686 || INTVAL (op1) < -16*1024*1024)
12687 break;
12688 if (GET_CODE (op0) == LABEL_REF)
12689 return true;
12690 if (GET_CODE (op0) == CONST
12691 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12692 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12693 return true;
12694 if (GET_CODE (op0) == UNSPEC
12695 && XINT (op0, 1) == UNSPEC_PCREL)
12696 return true;
12697 if (GET_CODE (op0) != SYMBOL_REF)
12698 break;
12699 /* FALLTHRU */
12701 case SYMBOL_REF:
12702 /* TLS references should always be enclosed in UNSPEC.
12703 The dllimported symbol needs always to be resolved. */
12704 if (SYMBOL_REF_TLS_MODEL (op0)
12705 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12706 return false;
12708 if (TARGET_PECOFF)
12710 if (is_imported_p (op0))
12711 return true;
12713 if (SYMBOL_REF_FAR_ADDR_P (op0)
12714 || !SYMBOL_REF_LOCAL_P (op0))
12715 break;
12717 /* Function-symbols need to be resolved only for
12718 large-model.
12719 For the small-model we don't need to resolve anything
12720 here. */
12721 if ((ix86_cmodel != CM_LARGE_PIC
12722 && SYMBOL_REF_FUNCTION_P (op0))
12723 || ix86_cmodel == CM_SMALL_PIC)
12724 return true;
12725 /* Non-external symbols don't need to be resolved for
12726 large, and medium-model. */
12727 if ((ix86_cmodel == CM_LARGE_PIC
12728 || ix86_cmodel == CM_MEDIUM_PIC)
12729 && !SYMBOL_REF_EXTERNAL_P (op0))
12730 return true;
12732 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12733 && SYMBOL_REF_LOCAL_P (op0)
12734 && ix86_cmodel != CM_LARGE_PIC)
12735 return true;
12736 break;
12738 default:
12739 break;
12742 if (GET_CODE (disp) != CONST)
12743 return false;
12744 disp = XEXP (disp, 0);
12746 if (TARGET_64BIT)
12748 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12749 of GOT tables. We should not need these anyway. */
12750 if (GET_CODE (disp) != UNSPEC
12751 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12752 && XINT (disp, 1) != UNSPEC_GOTOFF
12753 && XINT (disp, 1) != UNSPEC_PCREL
12754 && XINT (disp, 1) != UNSPEC_PLTOFF))
12755 return false;
12757 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12758 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12759 return false;
12760 return true;
12763 saw_plus = false;
12764 if (GET_CODE (disp) == PLUS)
12766 if (!CONST_INT_P (XEXP (disp, 1)))
12767 return false;
12768 disp = XEXP (disp, 0);
12769 saw_plus = true;
12772 if (TARGET_MACHO && darwin_local_data_pic (disp))
12773 return true;
12775 if (GET_CODE (disp) != UNSPEC)
12776 return false;
12778 switch (XINT (disp, 1))
12780 case UNSPEC_GOT:
12781 if (saw_plus)
12782 return false;
12783 /* We need to check for both symbols and labels because VxWorks loads
12784 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12785 details. */
12786 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12787 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12788 case UNSPEC_GOTOFF:
12789 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12790 While ABI specify also 32bit relocation but we don't produce it in
12791 small PIC model at all. */
12792 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12793 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12794 && !TARGET_64BIT)
12795 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12796 return false;
12797 case UNSPEC_GOTTPOFF:
12798 case UNSPEC_GOTNTPOFF:
12799 case UNSPEC_INDNTPOFF:
12800 if (saw_plus)
12801 return false;
12802 disp = XVECEXP (disp, 0, 0);
12803 return (GET_CODE (disp) == SYMBOL_REF
12804 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12805 case UNSPEC_NTPOFF:
12806 disp = XVECEXP (disp, 0, 0);
12807 return (GET_CODE (disp) == SYMBOL_REF
12808 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12809 case UNSPEC_DTPOFF:
12810 disp = XVECEXP (disp, 0, 0);
12811 return (GET_CODE (disp) == SYMBOL_REF
12812 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12815 return false;
12818 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12819 replace the input X, or the original X if no replacement is called for.
12820 The output parameter *WIN is 1 if the calling macro should goto WIN,
12821 0 if it should not. */
12823 bool
12824 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12825 int)
12827 /* Reload can generate:
12829 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12830 (reg:DI 97))
12831 (reg:DI 2 cx))
12833 This RTX is rejected from ix86_legitimate_address_p due to
12834 non-strictness of base register 97. Following this rejection,
12835 reload pushes all three components into separate registers,
12836 creating invalid memory address RTX.
12838 Following code reloads only the invalid part of the
12839 memory address RTX. */
12841 if (GET_CODE (x) == PLUS
12842 && REG_P (XEXP (x, 1))
12843 && GET_CODE (XEXP (x, 0)) == PLUS
12844 && REG_P (XEXP (XEXP (x, 0), 1)))
12846 rtx base, index;
12847 bool something_reloaded = false;
12849 base = XEXP (XEXP (x, 0), 1);
12850 if (!REG_OK_FOR_BASE_STRICT_P (base))
12852 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12853 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12854 opnum, (enum reload_type) type);
12855 something_reloaded = true;
12858 index = XEXP (x, 1);
12859 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12861 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12862 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12863 opnum, (enum reload_type) type);
12864 something_reloaded = true;
12867 gcc_assert (something_reloaded);
12868 return true;
12871 return false;
12874 /* Determine if op is suitable RTX for an address register.
12875 Return naked register if a register or a register subreg is
12876 found, otherwise return NULL_RTX. */
12878 static rtx
12879 ix86_validate_address_register (rtx op)
12881 enum machine_mode mode = GET_MODE (op);
12883 /* Only SImode or DImode registers can form the address. */
12884 if (mode != SImode && mode != DImode)
12885 return NULL_RTX;
12887 if (REG_P (op))
12888 return op;
12889 else if (GET_CODE (op) == SUBREG)
12891 rtx reg = SUBREG_REG (op);
12893 if (!REG_P (reg))
12894 return NULL_RTX;
12896 mode = GET_MODE (reg);
12898 /* Don't allow SUBREGs that span more than a word. It can
12899 lead to spill failures when the register is one word out
12900 of a two word structure. */
12901 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12902 return NULL_RTX;
12904 /* Allow only SUBREGs of non-eliminable hard registers. */
12905 if (register_no_elim_operand (reg, mode))
12906 return reg;
12909 /* Op is not a register. */
12910 return NULL_RTX;
12913 /* Recognizes RTL expressions that are valid memory addresses for an
12914 instruction. The MODE argument is the machine mode for the MEM
12915 expression that wants to use this address.
12917 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12918 convert common non-canonical forms to canonical form so that they will
12919 be recognized. */
12921 static bool
12922 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12924 struct ix86_address parts;
12925 rtx base, index, disp;
12926 HOST_WIDE_INT scale;
12927 enum ix86_address_seg seg;
12929 if (ix86_decompose_address (addr, &parts) <= 0)
12930 /* Decomposition failed. */
12931 return false;
12933 base = parts.base;
12934 index = parts.index;
12935 disp = parts.disp;
12936 scale = parts.scale;
12937 seg = parts.seg;
12939 /* Validate base register. */
12940 if (base)
12942 rtx reg = ix86_validate_address_register (base);
12944 if (reg == NULL_RTX)
12945 return false;
12947 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12948 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12949 /* Base is not valid. */
12950 return false;
12953 /* Validate index register. */
12954 if (index)
12956 rtx reg = ix86_validate_address_register (index);
12958 if (reg == NULL_RTX)
12959 return false;
12961 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12962 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12963 /* Index is not valid. */
12964 return false;
12967 /* Index and base should have the same mode. */
12968 if (base && index
12969 && GET_MODE (base) != GET_MODE (index))
12970 return false;
12972 /* Address override works only on the (%reg) part of %fs:(%reg). */
12973 if (seg != SEG_DEFAULT
12974 && ((base && GET_MODE (base) != word_mode)
12975 || (index && GET_MODE (index) != word_mode)))
12976 return false;
12978 /* Validate scale factor. */
12979 if (scale != 1)
12981 if (!index)
12982 /* Scale without index. */
12983 return false;
12985 if (scale != 2 && scale != 4 && scale != 8)
12986 /* Scale is not a valid multiplier. */
12987 return false;
12990 /* Validate displacement. */
12991 if (disp)
12993 if (GET_CODE (disp) == CONST
12994 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12995 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12996 switch (XINT (XEXP (disp, 0), 1))
12998 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12999 used. While ABI specify also 32bit relocations, we don't produce
13000 them at all and use IP relative instead. */
13001 case UNSPEC_GOT:
13002 case UNSPEC_GOTOFF:
13003 gcc_assert (flag_pic);
13004 if (!TARGET_64BIT)
13005 goto is_legitimate_pic;
13007 /* 64bit address unspec. */
13008 return false;
13010 case UNSPEC_GOTPCREL:
13011 case UNSPEC_PCREL:
13012 gcc_assert (flag_pic);
13013 goto is_legitimate_pic;
13015 case UNSPEC_GOTTPOFF:
13016 case UNSPEC_GOTNTPOFF:
13017 case UNSPEC_INDNTPOFF:
13018 case UNSPEC_NTPOFF:
13019 case UNSPEC_DTPOFF:
13020 break;
13022 case UNSPEC_STACK_CHECK:
13023 gcc_assert (flag_split_stack);
13024 break;
13026 default:
13027 /* Invalid address unspec. */
13028 return false;
13031 else if (SYMBOLIC_CONST (disp)
13032 && (flag_pic
13033 || (TARGET_MACHO
13034 #if TARGET_MACHO
13035 && MACHOPIC_INDIRECT
13036 && !machopic_operand_p (disp)
13037 #endif
13041 is_legitimate_pic:
13042 if (TARGET_64BIT && (index || base))
13044 /* foo@dtpoff(%rX) is ok. */
13045 if (GET_CODE (disp) != CONST
13046 || GET_CODE (XEXP (disp, 0)) != PLUS
13047 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13048 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13049 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13050 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13051 /* Non-constant pic memory reference. */
13052 return false;
13054 else if ((!TARGET_MACHO || flag_pic)
13055 && ! legitimate_pic_address_disp_p (disp))
13056 /* Displacement is an invalid pic construct. */
13057 return false;
13058 #if TARGET_MACHO
13059 else if (MACHO_DYNAMIC_NO_PIC_P
13060 && !ix86_legitimate_constant_p (Pmode, disp))
13061 /* displacment must be referenced via non_lazy_pointer */
13062 return false;
13063 #endif
13065 /* This code used to verify that a symbolic pic displacement
13066 includes the pic_offset_table_rtx register.
13068 While this is good idea, unfortunately these constructs may
13069 be created by "adds using lea" optimization for incorrect
13070 code like:
13072 int a;
13073 int foo(int i)
13075 return *(&a+i);
13078 This code is nonsensical, but results in addressing
13079 GOT table with pic_offset_table_rtx base. We can't
13080 just refuse it easily, since it gets matched by
13081 "addsi3" pattern, that later gets split to lea in the
13082 case output register differs from input. While this
13083 can be handled by separate addsi pattern for this case
13084 that never results in lea, this seems to be easier and
13085 correct fix for crash to disable this test. */
13087 else if (GET_CODE (disp) != LABEL_REF
13088 && !CONST_INT_P (disp)
13089 && (GET_CODE (disp) != CONST
13090 || !ix86_legitimate_constant_p (Pmode, disp))
13091 && (GET_CODE (disp) != SYMBOL_REF
13092 || !ix86_legitimate_constant_p (Pmode, disp)))
13093 /* Displacement is not constant. */
13094 return false;
13095 else if (TARGET_64BIT
13096 && !x86_64_immediate_operand (disp, VOIDmode))
13097 /* Displacement is out of range. */
13098 return false;
13099 /* In x32 mode, constant addresses are sign extended to 64bit, so
13100 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13101 else if (TARGET_X32 && !(index || base)
13102 && CONST_INT_P (disp)
13103 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13104 return false;
13107 /* Everything looks valid. */
13108 return true;
13111 /* Determine if a given RTX is a valid constant address. */
13113 bool
13114 constant_address_p (rtx x)
13116 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13119 /* Return a unique alias set for the GOT. */
13121 static alias_set_type
13122 ix86_GOT_alias_set (void)
13124 static alias_set_type set = -1;
13125 if (set == -1)
13126 set = new_alias_set ();
13127 return set;
13130 /* Return a legitimate reference for ORIG (an address) using the
13131 register REG. If REG is 0, a new pseudo is generated.
13133 There are two types of references that must be handled:
13135 1. Global data references must load the address from the GOT, via
13136 the PIC reg. An insn is emitted to do this load, and the reg is
13137 returned.
13139 2. Static data references, constant pool addresses, and code labels
13140 compute the address as an offset from the GOT, whose base is in
13141 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13142 differentiate them from global data objects. The returned
13143 address is the PIC reg + an unspec constant.
13145 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13146 reg also appears in the address. */
13148 static rtx
13149 legitimize_pic_address (rtx orig, rtx reg)
13151 rtx addr = orig;
13152 rtx new_rtx = orig;
13154 #if TARGET_MACHO
13155 if (TARGET_MACHO && !TARGET_64BIT)
13157 if (reg == 0)
13158 reg = gen_reg_rtx (Pmode);
13159 /* Use the generic Mach-O PIC machinery. */
13160 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13162 #endif
13164 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13166 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13167 if (tmp)
13168 return tmp;
13171 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13172 new_rtx = addr;
13173 else if (TARGET_64BIT && !TARGET_PECOFF
13174 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13176 rtx tmpreg;
13177 /* This symbol may be referenced via a displacement from the PIC
13178 base address (@GOTOFF). */
13180 if (reload_in_progress)
13181 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13182 if (GET_CODE (addr) == CONST)
13183 addr = XEXP (addr, 0);
13184 if (GET_CODE (addr) == PLUS)
13186 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13187 UNSPEC_GOTOFF);
13188 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13190 else
13191 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13192 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13193 if (!reg)
13194 tmpreg = gen_reg_rtx (Pmode);
13195 else
13196 tmpreg = reg;
13197 emit_move_insn (tmpreg, new_rtx);
13199 if (reg != 0)
13201 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13202 tmpreg, 1, OPTAB_DIRECT);
13203 new_rtx = reg;
13205 else
13206 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13208 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13210 /* This symbol may be referenced via a displacement from the PIC
13211 base address (@GOTOFF). */
13213 if (reload_in_progress)
13214 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13215 if (GET_CODE (addr) == CONST)
13216 addr = XEXP (addr, 0);
13217 if (GET_CODE (addr) == PLUS)
13219 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13220 UNSPEC_GOTOFF);
13221 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13223 else
13224 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13225 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13226 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13228 if (reg != 0)
13230 emit_move_insn (reg, new_rtx);
13231 new_rtx = reg;
13234 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13235 /* We can't use @GOTOFF for text labels on VxWorks;
13236 see gotoff_operand. */
13237 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13239 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13240 if (tmp)
13241 return tmp;
13243 /* For x64 PE-COFF there is no GOT table. So we use address
13244 directly. */
13245 if (TARGET_64BIT && TARGET_PECOFF)
13247 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13248 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13250 if (reg == 0)
13251 reg = gen_reg_rtx (Pmode);
13252 emit_move_insn (reg, new_rtx);
13253 new_rtx = reg;
13255 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13257 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13258 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13259 new_rtx = gen_const_mem (Pmode, new_rtx);
13260 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13262 if (reg == 0)
13263 reg = gen_reg_rtx (Pmode);
13264 /* Use directly gen_movsi, otherwise the address is loaded
13265 into register for CSE. We don't want to CSE this addresses,
13266 instead we CSE addresses from the GOT table, so skip this. */
13267 emit_insn (gen_movsi (reg, new_rtx));
13268 new_rtx = reg;
13270 else
13272 /* This symbol must be referenced via a load from the
13273 Global Offset Table (@GOT). */
13275 if (reload_in_progress)
13276 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13277 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13278 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13279 if (TARGET_64BIT)
13280 new_rtx = force_reg (Pmode, new_rtx);
13281 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13282 new_rtx = gen_const_mem (Pmode, new_rtx);
13283 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13285 if (reg == 0)
13286 reg = gen_reg_rtx (Pmode);
13287 emit_move_insn (reg, new_rtx);
13288 new_rtx = reg;
13291 else
13293 if (CONST_INT_P (addr)
13294 && !x86_64_immediate_operand (addr, VOIDmode))
13296 if (reg)
13298 emit_move_insn (reg, addr);
13299 new_rtx = reg;
13301 else
13302 new_rtx = force_reg (Pmode, addr);
13304 else if (GET_CODE (addr) == CONST)
13306 addr = XEXP (addr, 0);
13308 /* We must match stuff we generate before. Assume the only
13309 unspecs that can get here are ours. Not that we could do
13310 anything with them anyway.... */
13311 if (GET_CODE (addr) == UNSPEC
13312 || (GET_CODE (addr) == PLUS
13313 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13314 return orig;
13315 gcc_assert (GET_CODE (addr) == PLUS);
13317 if (GET_CODE (addr) == PLUS)
13319 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13321 /* Check first to see if this is a constant offset from a @GOTOFF
13322 symbol reference. */
13323 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13324 && CONST_INT_P (op1))
13326 if (!TARGET_64BIT)
13328 if (reload_in_progress)
13329 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13330 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13331 UNSPEC_GOTOFF);
13332 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13333 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13334 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13336 if (reg != 0)
13338 emit_move_insn (reg, new_rtx);
13339 new_rtx = reg;
13342 else
13344 if (INTVAL (op1) < -16*1024*1024
13345 || INTVAL (op1) >= 16*1024*1024)
13347 if (!x86_64_immediate_operand (op1, Pmode))
13348 op1 = force_reg (Pmode, op1);
13349 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13353 else
13355 rtx base = legitimize_pic_address (op0, reg);
13356 enum machine_mode mode = GET_MODE (base);
13357 new_rtx
13358 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13360 if (CONST_INT_P (new_rtx))
13362 if (INTVAL (new_rtx) < -16*1024*1024
13363 || INTVAL (new_rtx) >= 16*1024*1024)
13365 if (!x86_64_immediate_operand (new_rtx, mode))
13366 new_rtx = force_reg (mode, new_rtx);
13367 new_rtx
13368 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13370 else
13371 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13373 else
13375 if (GET_CODE (new_rtx) == PLUS
13376 && CONSTANT_P (XEXP (new_rtx, 1)))
13378 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13379 new_rtx = XEXP (new_rtx, 1);
13381 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13386 return new_rtx;
13389 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13391 static rtx
13392 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13394 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13396 if (GET_MODE (tp) != tp_mode)
13398 gcc_assert (GET_MODE (tp) == SImode);
13399 gcc_assert (tp_mode == DImode);
13401 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13404 if (to_reg)
13405 tp = copy_to_mode_reg (tp_mode, tp);
13407 return tp;
13410 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13412 static GTY(()) rtx ix86_tls_symbol;
13414 static rtx
13415 ix86_tls_get_addr (void)
13417 if (!ix86_tls_symbol)
13419 const char *sym
13420 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13421 ? "___tls_get_addr" : "__tls_get_addr");
13423 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13426 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13428 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13429 UNSPEC_PLTOFF);
13430 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13431 gen_rtx_CONST (Pmode, unspec));
13434 return ix86_tls_symbol;
13437 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13439 static GTY(()) rtx ix86_tls_module_base_symbol;
13442 ix86_tls_module_base (void)
13444 if (!ix86_tls_module_base_symbol)
13446 ix86_tls_module_base_symbol
13447 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13449 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13450 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13453 return ix86_tls_module_base_symbol;
13456 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13457 false if we expect this to be used for a memory address and true if
13458 we expect to load the address into a register. */
13460 static rtx
13461 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13463 rtx dest, base, off;
13464 rtx pic = NULL_RTX, tp = NULL_RTX;
13465 enum machine_mode tp_mode = Pmode;
13466 int type;
13468 /* Fall back to global dynamic model if tool chain cannot support local
13469 dynamic. */
13470 if (TARGET_SUN_TLS && !TARGET_64BIT
13471 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13472 && model == TLS_MODEL_LOCAL_DYNAMIC)
13473 model = TLS_MODEL_GLOBAL_DYNAMIC;
13475 switch (model)
13477 case TLS_MODEL_GLOBAL_DYNAMIC:
13478 dest = gen_reg_rtx (Pmode);
13480 if (!TARGET_64BIT)
13482 if (flag_pic && !TARGET_PECOFF)
13483 pic = pic_offset_table_rtx;
13484 else
13486 pic = gen_reg_rtx (Pmode);
13487 emit_insn (gen_set_got (pic));
13491 if (TARGET_GNU2_TLS)
13493 if (TARGET_64BIT)
13494 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13495 else
13496 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13498 tp = get_thread_pointer (Pmode, true);
13499 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13501 if (GET_MODE (x) != Pmode)
13502 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13504 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13506 else
13508 rtx caddr = ix86_tls_get_addr ();
13510 if (TARGET_64BIT)
13512 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13513 rtx_insn *insns;
13515 start_sequence ();
13516 emit_call_insn
13517 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13518 insns = get_insns ();
13519 end_sequence ();
13521 if (GET_MODE (x) != Pmode)
13522 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13524 RTL_CONST_CALL_P (insns) = 1;
13525 emit_libcall_block (insns, dest, rax, x);
13527 else
13528 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13530 break;
13532 case TLS_MODEL_LOCAL_DYNAMIC:
13533 base = gen_reg_rtx (Pmode);
13535 if (!TARGET_64BIT)
13537 if (flag_pic)
13538 pic = pic_offset_table_rtx;
13539 else
13541 pic = gen_reg_rtx (Pmode);
13542 emit_insn (gen_set_got (pic));
13546 if (TARGET_GNU2_TLS)
13548 rtx tmp = ix86_tls_module_base ();
13550 if (TARGET_64BIT)
13551 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13552 else
13553 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13555 tp = get_thread_pointer (Pmode, true);
13556 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13557 gen_rtx_MINUS (Pmode, tmp, tp));
13559 else
13561 rtx caddr = ix86_tls_get_addr ();
13563 if (TARGET_64BIT)
13565 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13566 rtx_insn *insns;
13567 rtx eqv;
13569 start_sequence ();
13570 emit_call_insn
13571 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13572 insns = get_insns ();
13573 end_sequence ();
13575 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13576 share the LD_BASE result with other LD model accesses. */
13577 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13578 UNSPEC_TLS_LD_BASE);
13580 RTL_CONST_CALL_P (insns) = 1;
13581 emit_libcall_block (insns, base, rax, eqv);
13583 else
13584 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13587 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13588 off = gen_rtx_CONST (Pmode, off);
13590 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13592 if (TARGET_GNU2_TLS)
13594 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13596 if (GET_MODE (x) != Pmode)
13597 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13599 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13601 break;
13603 case TLS_MODEL_INITIAL_EXEC:
13604 if (TARGET_64BIT)
13606 if (TARGET_SUN_TLS && !TARGET_X32)
13608 /* The Sun linker took the AMD64 TLS spec literally
13609 and can only handle %rax as destination of the
13610 initial executable code sequence. */
13612 dest = gen_reg_rtx (DImode);
13613 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13614 return dest;
13617 /* Generate DImode references to avoid %fs:(%reg32)
13618 problems and linker IE->LE relaxation bug. */
13619 tp_mode = DImode;
13620 pic = NULL;
13621 type = UNSPEC_GOTNTPOFF;
13623 else if (flag_pic)
13625 if (reload_in_progress)
13626 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13627 pic = pic_offset_table_rtx;
13628 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13630 else if (!TARGET_ANY_GNU_TLS)
13632 pic = gen_reg_rtx (Pmode);
13633 emit_insn (gen_set_got (pic));
13634 type = UNSPEC_GOTTPOFF;
13636 else
13638 pic = NULL;
13639 type = UNSPEC_INDNTPOFF;
13642 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13643 off = gen_rtx_CONST (tp_mode, off);
13644 if (pic)
13645 off = gen_rtx_PLUS (tp_mode, pic, off);
13646 off = gen_const_mem (tp_mode, off);
13647 set_mem_alias_set (off, ix86_GOT_alias_set ());
13649 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13651 base = get_thread_pointer (tp_mode,
13652 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13653 off = force_reg (tp_mode, off);
13654 return gen_rtx_PLUS (tp_mode, base, off);
13656 else
13658 base = get_thread_pointer (Pmode, true);
13659 dest = gen_reg_rtx (Pmode);
13660 emit_insn (ix86_gen_sub3 (dest, base, off));
13662 break;
13664 case TLS_MODEL_LOCAL_EXEC:
13665 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13666 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13667 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13668 off = gen_rtx_CONST (Pmode, off);
13670 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13672 base = get_thread_pointer (Pmode,
13673 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13674 return gen_rtx_PLUS (Pmode, base, off);
13676 else
13678 base = get_thread_pointer (Pmode, true);
13679 dest = gen_reg_rtx (Pmode);
13680 emit_insn (ix86_gen_sub3 (dest, base, off));
13682 break;
13684 default:
13685 gcc_unreachable ();
13688 return dest;
13691 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13692 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13693 unique refptr-DECL symbol corresponding to symbol DECL. */
13695 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13696 htab_t dllimport_map;
13698 static tree
13699 get_dllimport_decl (tree decl, bool beimport)
13701 struct tree_map *h, in;
13702 void **loc;
13703 const char *name;
13704 const char *prefix;
13705 size_t namelen, prefixlen;
13706 char *imp_name;
13707 tree to;
13708 rtx rtl;
13710 if (!dllimport_map)
13711 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13713 in.hash = htab_hash_pointer (decl);
13714 in.base.from = decl;
13715 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13716 h = (struct tree_map *) *loc;
13717 if (h)
13718 return h->to;
13720 *loc = h = ggc_alloc<tree_map> ();
13721 h->hash = in.hash;
13722 h->base.from = decl;
13723 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13724 VAR_DECL, NULL, ptr_type_node);
13725 DECL_ARTIFICIAL (to) = 1;
13726 DECL_IGNORED_P (to) = 1;
13727 DECL_EXTERNAL (to) = 1;
13728 TREE_READONLY (to) = 1;
13730 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13731 name = targetm.strip_name_encoding (name);
13732 if (beimport)
13733 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13734 ? "*__imp_" : "*__imp__";
13735 else
13736 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13737 namelen = strlen (name);
13738 prefixlen = strlen (prefix);
13739 imp_name = (char *) alloca (namelen + prefixlen + 1);
13740 memcpy (imp_name, prefix, prefixlen);
13741 memcpy (imp_name + prefixlen, name, namelen + 1);
13743 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13744 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13745 SET_SYMBOL_REF_DECL (rtl, to);
13746 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13747 if (!beimport)
13749 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13750 #ifdef SUB_TARGET_RECORD_STUB
13751 SUB_TARGET_RECORD_STUB (name);
13752 #endif
13755 rtl = gen_const_mem (Pmode, rtl);
13756 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13758 SET_DECL_RTL (to, rtl);
13759 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13761 return to;
13764 /* Expand SYMBOL into its corresponding far-addresse symbol.
13765 WANT_REG is true if we require the result be a register. */
13767 static rtx
13768 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13770 tree imp_decl;
13771 rtx x;
13773 gcc_assert (SYMBOL_REF_DECL (symbol));
13774 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13776 x = DECL_RTL (imp_decl);
13777 if (want_reg)
13778 x = force_reg (Pmode, x);
13779 return x;
13782 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13783 true if we require the result be a register. */
13785 static rtx
13786 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13788 tree imp_decl;
13789 rtx x;
13791 gcc_assert (SYMBOL_REF_DECL (symbol));
13792 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13794 x = DECL_RTL (imp_decl);
13795 if (want_reg)
13796 x = force_reg (Pmode, x);
13797 return x;
13800 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13801 is true if we require the result be a register. */
13803 static rtx
13804 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13806 if (!TARGET_PECOFF)
13807 return NULL_RTX;
13809 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13811 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13812 return legitimize_dllimport_symbol (addr, inreg);
13813 if (GET_CODE (addr) == CONST
13814 && GET_CODE (XEXP (addr, 0)) == PLUS
13815 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13816 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13818 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13819 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13823 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13824 return NULL_RTX;
13825 if (GET_CODE (addr) == SYMBOL_REF
13826 && !is_imported_p (addr)
13827 && SYMBOL_REF_EXTERNAL_P (addr)
13828 && SYMBOL_REF_DECL (addr))
13829 return legitimize_pe_coff_extern_decl (addr, inreg);
13831 if (GET_CODE (addr) == CONST
13832 && GET_CODE (XEXP (addr, 0)) == PLUS
13833 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13834 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13835 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13836 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13838 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13839 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13841 return NULL_RTX;
13844 /* Try machine-dependent ways of modifying an illegitimate address
13845 to be legitimate. If we find one, return the new, valid address.
13846 This macro is used in only one place: `memory_address' in explow.c.
13848 OLDX is the address as it was before break_out_memory_refs was called.
13849 In some cases it is useful to look at this to decide what needs to be done.
13851 It is always safe for this macro to do nothing. It exists to recognize
13852 opportunities to optimize the output.
13854 For the 80386, we handle X+REG by loading X into a register R and
13855 using R+REG. R will go in a general reg and indexing will be used.
13856 However, if REG is a broken-out memory address or multiplication,
13857 nothing needs to be done because REG can certainly go in a general reg.
13859 When -fpic is used, special handling is needed for symbolic references.
13860 See comments by legitimize_pic_address in i386.c for details. */
13862 static rtx
13863 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13865 int changed = 0;
13866 unsigned log;
13868 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13869 if (log)
13870 return legitimize_tls_address (x, (enum tls_model) log, false);
13871 if (GET_CODE (x) == CONST
13872 && GET_CODE (XEXP (x, 0)) == PLUS
13873 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13874 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13876 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13877 (enum tls_model) log, false);
13878 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13881 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13883 rtx tmp = legitimize_pe_coff_symbol (x, true);
13884 if (tmp)
13885 return tmp;
13888 if (flag_pic && SYMBOLIC_CONST (x))
13889 return legitimize_pic_address (x, 0);
13891 #if TARGET_MACHO
13892 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13893 return machopic_indirect_data_reference (x, 0);
13894 #endif
13896 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13897 if (GET_CODE (x) == ASHIFT
13898 && CONST_INT_P (XEXP (x, 1))
13899 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13901 changed = 1;
13902 log = INTVAL (XEXP (x, 1));
13903 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13904 GEN_INT (1 << log));
13907 if (GET_CODE (x) == PLUS)
13909 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13911 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13912 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13913 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13915 changed = 1;
13916 log = INTVAL (XEXP (XEXP (x, 0), 1));
13917 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13918 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13919 GEN_INT (1 << log));
13922 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13923 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13924 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13926 changed = 1;
13927 log = INTVAL (XEXP (XEXP (x, 1), 1));
13928 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13929 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13930 GEN_INT (1 << log));
13933 /* Put multiply first if it isn't already. */
13934 if (GET_CODE (XEXP (x, 1)) == MULT)
13936 rtx tmp = XEXP (x, 0);
13937 XEXP (x, 0) = XEXP (x, 1);
13938 XEXP (x, 1) = tmp;
13939 changed = 1;
13942 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13943 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13944 created by virtual register instantiation, register elimination, and
13945 similar optimizations. */
13946 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13948 changed = 1;
13949 x = gen_rtx_PLUS (Pmode,
13950 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13951 XEXP (XEXP (x, 1), 0)),
13952 XEXP (XEXP (x, 1), 1));
13955 /* Canonicalize
13956 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13957 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13958 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13959 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13960 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13961 && CONSTANT_P (XEXP (x, 1)))
13963 rtx constant;
13964 rtx other = NULL_RTX;
13966 if (CONST_INT_P (XEXP (x, 1)))
13968 constant = XEXP (x, 1);
13969 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13971 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13973 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13974 other = XEXP (x, 1);
13976 else
13977 constant = 0;
13979 if (constant)
13981 changed = 1;
13982 x = gen_rtx_PLUS (Pmode,
13983 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13984 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13985 plus_constant (Pmode, other,
13986 INTVAL (constant)));
13990 if (changed && ix86_legitimate_address_p (mode, x, false))
13991 return x;
13993 if (GET_CODE (XEXP (x, 0)) == MULT)
13995 changed = 1;
13996 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13999 if (GET_CODE (XEXP (x, 1)) == MULT)
14001 changed = 1;
14002 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14005 if (changed
14006 && REG_P (XEXP (x, 1))
14007 && REG_P (XEXP (x, 0)))
14008 return x;
14010 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14012 changed = 1;
14013 x = legitimize_pic_address (x, 0);
14016 if (changed && ix86_legitimate_address_p (mode, x, false))
14017 return x;
14019 if (REG_P (XEXP (x, 0)))
14021 rtx temp = gen_reg_rtx (Pmode);
14022 rtx val = force_operand (XEXP (x, 1), temp);
14023 if (val != temp)
14025 val = convert_to_mode (Pmode, val, 1);
14026 emit_move_insn (temp, val);
14029 XEXP (x, 1) = temp;
14030 return x;
14033 else if (REG_P (XEXP (x, 1)))
14035 rtx temp = gen_reg_rtx (Pmode);
14036 rtx val = force_operand (XEXP (x, 0), temp);
14037 if (val != temp)
14039 val = convert_to_mode (Pmode, val, 1);
14040 emit_move_insn (temp, val);
14043 XEXP (x, 0) = temp;
14044 return x;
14048 return x;
14051 /* Print an integer constant expression in assembler syntax. Addition
14052 and subtraction are the only arithmetic that may appear in these
14053 expressions. FILE is the stdio stream to write to, X is the rtx, and
14054 CODE is the operand print code from the output string. */
14056 static void
14057 output_pic_addr_const (FILE *file, rtx x, int code)
14059 char buf[256];
14061 switch (GET_CODE (x))
14063 case PC:
14064 gcc_assert (flag_pic);
14065 putc ('.', file);
14066 break;
14068 case SYMBOL_REF:
14069 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14070 output_addr_const (file, x);
14071 else
14073 const char *name = XSTR (x, 0);
14075 /* Mark the decl as referenced so that cgraph will
14076 output the function. */
14077 if (SYMBOL_REF_DECL (x))
14078 mark_decl_referenced (SYMBOL_REF_DECL (x));
14080 #if TARGET_MACHO
14081 if (MACHOPIC_INDIRECT
14082 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14083 name = machopic_indirection_name (x, /*stub_p=*/true);
14084 #endif
14085 assemble_name (file, name);
14087 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14088 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14089 fputs ("@PLT", file);
14090 break;
14092 case LABEL_REF:
14093 x = XEXP (x, 0);
14094 /* FALLTHRU */
14095 case CODE_LABEL:
14096 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14097 assemble_name (asm_out_file, buf);
14098 break;
14100 case CONST_INT:
14101 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14102 break;
14104 case CONST:
14105 /* This used to output parentheses around the expression,
14106 but that does not work on the 386 (either ATT or BSD assembler). */
14107 output_pic_addr_const (file, XEXP (x, 0), code);
14108 break;
14110 case CONST_DOUBLE:
14111 if (GET_MODE (x) == VOIDmode)
14113 /* We can use %d if the number is <32 bits and positive. */
14114 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14115 fprintf (file, "0x%lx%08lx",
14116 (unsigned long) CONST_DOUBLE_HIGH (x),
14117 (unsigned long) CONST_DOUBLE_LOW (x));
14118 else
14119 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14121 else
14122 /* We can't handle floating point constants;
14123 TARGET_PRINT_OPERAND must handle them. */
14124 output_operand_lossage ("floating constant misused");
14125 break;
14127 case PLUS:
14128 /* Some assemblers need integer constants to appear first. */
14129 if (CONST_INT_P (XEXP (x, 0)))
14131 output_pic_addr_const (file, XEXP (x, 0), code);
14132 putc ('+', file);
14133 output_pic_addr_const (file, XEXP (x, 1), code);
14135 else
14137 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14138 output_pic_addr_const (file, XEXP (x, 1), code);
14139 putc ('+', file);
14140 output_pic_addr_const (file, XEXP (x, 0), code);
14142 break;
14144 case MINUS:
14145 if (!TARGET_MACHO)
14146 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14147 output_pic_addr_const (file, XEXP (x, 0), code);
14148 putc ('-', file);
14149 output_pic_addr_const (file, XEXP (x, 1), code);
14150 if (!TARGET_MACHO)
14151 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14152 break;
14154 case UNSPEC:
14155 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14157 bool f = i386_asm_output_addr_const_extra (file, x);
14158 gcc_assert (f);
14159 break;
14162 gcc_assert (XVECLEN (x, 0) == 1);
14163 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14164 switch (XINT (x, 1))
14166 case UNSPEC_GOT:
14167 fputs ("@GOT", file);
14168 break;
14169 case UNSPEC_GOTOFF:
14170 fputs ("@GOTOFF", file);
14171 break;
14172 case UNSPEC_PLTOFF:
14173 fputs ("@PLTOFF", file);
14174 break;
14175 case UNSPEC_PCREL:
14176 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14177 "(%rip)" : "[rip]", file);
14178 break;
14179 case UNSPEC_GOTPCREL:
14180 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14181 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14182 break;
14183 case UNSPEC_GOTTPOFF:
14184 /* FIXME: This might be @TPOFF in Sun ld too. */
14185 fputs ("@gottpoff", file);
14186 break;
14187 case UNSPEC_TPOFF:
14188 fputs ("@tpoff", file);
14189 break;
14190 case UNSPEC_NTPOFF:
14191 if (TARGET_64BIT)
14192 fputs ("@tpoff", file);
14193 else
14194 fputs ("@ntpoff", file);
14195 break;
14196 case UNSPEC_DTPOFF:
14197 fputs ("@dtpoff", file);
14198 break;
14199 case UNSPEC_GOTNTPOFF:
14200 if (TARGET_64BIT)
14201 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14202 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14203 else
14204 fputs ("@gotntpoff", file);
14205 break;
14206 case UNSPEC_INDNTPOFF:
14207 fputs ("@indntpoff", file);
14208 break;
14209 #if TARGET_MACHO
14210 case UNSPEC_MACHOPIC_OFFSET:
14211 putc ('-', file);
14212 machopic_output_function_base_name (file);
14213 break;
14214 #endif
14215 default:
14216 output_operand_lossage ("invalid UNSPEC as operand");
14217 break;
14219 break;
14221 default:
14222 output_operand_lossage ("invalid expression as operand");
14226 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14227 We need to emit DTP-relative relocations. */
14229 static void ATTRIBUTE_UNUSED
14230 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14232 fputs (ASM_LONG, file);
14233 output_addr_const (file, x);
14234 fputs ("@dtpoff", file);
14235 switch (size)
14237 case 4:
14238 break;
14239 case 8:
14240 fputs (", 0", file);
14241 break;
14242 default:
14243 gcc_unreachable ();
14247 /* Return true if X is a representation of the PIC register. This copes
14248 with calls from ix86_find_base_term, where the register might have
14249 been replaced by a cselib value. */
14251 static bool
14252 ix86_pic_register_p (rtx x)
14254 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14255 return (pic_offset_table_rtx
14256 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14257 else
14258 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14261 /* Helper function for ix86_delegitimize_address.
14262 Attempt to delegitimize TLS local-exec accesses. */
14264 static rtx
14265 ix86_delegitimize_tls_address (rtx orig_x)
14267 rtx x = orig_x, unspec;
14268 struct ix86_address addr;
14270 if (!TARGET_TLS_DIRECT_SEG_REFS)
14271 return orig_x;
14272 if (MEM_P (x))
14273 x = XEXP (x, 0);
14274 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14275 return orig_x;
14276 if (ix86_decompose_address (x, &addr) == 0
14277 || addr.seg != DEFAULT_TLS_SEG_REG
14278 || addr.disp == NULL_RTX
14279 || GET_CODE (addr.disp) != CONST)
14280 return orig_x;
14281 unspec = XEXP (addr.disp, 0);
14282 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14283 unspec = XEXP (unspec, 0);
14284 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14285 return orig_x;
14286 x = XVECEXP (unspec, 0, 0);
14287 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14288 if (unspec != XEXP (addr.disp, 0))
14289 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14290 if (addr.index)
14292 rtx idx = addr.index;
14293 if (addr.scale != 1)
14294 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14295 x = gen_rtx_PLUS (Pmode, idx, x);
14297 if (addr.base)
14298 x = gen_rtx_PLUS (Pmode, addr.base, x);
14299 if (MEM_P (orig_x))
14300 x = replace_equiv_address_nv (orig_x, x);
14301 return x;
14304 /* In the name of slightly smaller debug output, and to cater to
14305 general assembler lossage, recognize PIC+GOTOFF and turn it back
14306 into a direct symbol reference.
14308 On Darwin, this is necessary to avoid a crash, because Darwin
14309 has a different PIC label for each routine but the DWARF debugging
14310 information is not associated with any particular routine, so it's
14311 necessary to remove references to the PIC label from RTL stored by
14312 the DWARF output code. */
14314 static rtx
14315 ix86_delegitimize_address (rtx x)
14317 rtx orig_x = delegitimize_mem_from_attrs (x);
14318 /* addend is NULL or some rtx if x is something+GOTOFF where
14319 something doesn't include the PIC register. */
14320 rtx addend = NULL_RTX;
14321 /* reg_addend is NULL or a multiple of some register. */
14322 rtx reg_addend = NULL_RTX;
14323 /* const_addend is NULL or a const_int. */
14324 rtx const_addend = NULL_RTX;
14325 /* This is the result, or NULL. */
14326 rtx result = NULL_RTX;
14328 x = orig_x;
14330 if (MEM_P (x))
14331 x = XEXP (x, 0);
14333 if (TARGET_64BIT)
14335 if (GET_CODE (x) == CONST
14336 && GET_CODE (XEXP (x, 0)) == PLUS
14337 && GET_MODE (XEXP (x, 0)) == Pmode
14338 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14339 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14340 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14342 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14343 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14344 if (MEM_P (orig_x))
14345 x = replace_equiv_address_nv (orig_x, x);
14346 return x;
14349 if (GET_CODE (x) == CONST
14350 && GET_CODE (XEXP (x, 0)) == UNSPEC
14351 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14352 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14353 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14355 x = XVECEXP (XEXP (x, 0), 0, 0);
14356 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14358 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14359 GET_MODE (x), 0);
14360 if (x == NULL_RTX)
14361 return orig_x;
14363 return x;
14366 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14367 return ix86_delegitimize_tls_address (orig_x);
14369 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14370 and -mcmodel=medium -fpic. */
14373 if (GET_CODE (x) != PLUS
14374 || GET_CODE (XEXP (x, 1)) != CONST)
14375 return ix86_delegitimize_tls_address (orig_x);
14377 if (ix86_pic_register_p (XEXP (x, 0)))
14378 /* %ebx + GOT/GOTOFF */
14380 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14382 /* %ebx + %reg * scale + GOT/GOTOFF */
14383 reg_addend = XEXP (x, 0);
14384 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14385 reg_addend = XEXP (reg_addend, 1);
14386 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14387 reg_addend = XEXP (reg_addend, 0);
14388 else
14390 reg_addend = NULL_RTX;
14391 addend = XEXP (x, 0);
14394 else
14395 addend = XEXP (x, 0);
14397 x = XEXP (XEXP (x, 1), 0);
14398 if (GET_CODE (x) == PLUS
14399 && CONST_INT_P (XEXP (x, 1)))
14401 const_addend = XEXP (x, 1);
14402 x = XEXP (x, 0);
14405 if (GET_CODE (x) == UNSPEC
14406 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14407 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14408 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14409 && !MEM_P (orig_x) && !addend)))
14410 result = XVECEXP (x, 0, 0);
14412 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14413 && !MEM_P (orig_x))
14414 result = XVECEXP (x, 0, 0);
14416 if (! result)
14417 return ix86_delegitimize_tls_address (orig_x);
14419 if (const_addend)
14420 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14421 if (reg_addend)
14422 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14423 if (addend)
14425 /* If the rest of original X doesn't involve the PIC register, add
14426 addend and subtract pic_offset_table_rtx. This can happen e.g.
14427 for code like:
14428 leal (%ebx, %ecx, 4), %ecx
14430 movl foo@GOTOFF(%ecx), %edx
14431 in which case we return (%ecx - %ebx) + foo. */
14432 if (pic_offset_table_rtx)
14433 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14434 pic_offset_table_rtx),
14435 result);
14436 else
14437 return orig_x;
14439 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14441 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14442 if (result == NULL_RTX)
14443 return orig_x;
14445 return result;
14448 /* If X is a machine specific address (i.e. a symbol or label being
14449 referenced as a displacement from the GOT implemented using an
14450 UNSPEC), then return the base term. Otherwise return X. */
14453 ix86_find_base_term (rtx x)
14455 rtx term;
14457 if (TARGET_64BIT)
14459 if (GET_CODE (x) != CONST)
14460 return x;
14461 term = XEXP (x, 0);
14462 if (GET_CODE (term) == PLUS
14463 && (CONST_INT_P (XEXP (term, 1))
14464 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14465 term = XEXP (term, 0);
14466 if (GET_CODE (term) != UNSPEC
14467 || (XINT (term, 1) != UNSPEC_GOTPCREL
14468 && XINT (term, 1) != UNSPEC_PCREL))
14469 return x;
14471 return XVECEXP (term, 0, 0);
14474 return ix86_delegitimize_address (x);
14477 static void
14478 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14479 bool fp, FILE *file)
14481 const char *suffix;
14483 if (mode == CCFPmode || mode == CCFPUmode)
14485 code = ix86_fp_compare_code_to_integer (code);
14486 mode = CCmode;
14488 if (reverse)
14489 code = reverse_condition (code);
14491 switch (code)
14493 case EQ:
14494 switch (mode)
14496 case CCAmode:
14497 suffix = "a";
14498 break;
14500 case CCCmode:
14501 suffix = "c";
14502 break;
14504 case CCOmode:
14505 suffix = "o";
14506 break;
14508 case CCSmode:
14509 suffix = "s";
14510 break;
14512 default:
14513 suffix = "e";
14515 break;
14516 case NE:
14517 switch (mode)
14519 case CCAmode:
14520 suffix = "na";
14521 break;
14523 case CCCmode:
14524 suffix = "nc";
14525 break;
14527 case CCOmode:
14528 suffix = "no";
14529 break;
14531 case CCSmode:
14532 suffix = "ns";
14533 break;
14535 default:
14536 suffix = "ne";
14538 break;
14539 case GT:
14540 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14541 suffix = "g";
14542 break;
14543 case GTU:
14544 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14545 Those same assemblers have the same but opposite lossage on cmov. */
14546 if (mode == CCmode)
14547 suffix = fp ? "nbe" : "a";
14548 else
14549 gcc_unreachable ();
14550 break;
14551 case LT:
14552 switch (mode)
14554 case CCNOmode:
14555 case CCGOCmode:
14556 suffix = "s";
14557 break;
14559 case CCmode:
14560 case CCGCmode:
14561 suffix = "l";
14562 break;
14564 default:
14565 gcc_unreachable ();
14567 break;
14568 case LTU:
14569 if (mode == CCmode)
14570 suffix = "b";
14571 else if (mode == CCCmode)
14572 suffix = "c";
14573 else
14574 gcc_unreachable ();
14575 break;
14576 case GE:
14577 switch (mode)
14579 case CCNOmode:
14580 case CCGOCmode:
14581 suffix = "ns";
14582 break;
14584 case CCmode:
14585 case CCGCmode:
14586 suffix = "ge";
14587 break;
14589 default:
14590 gcc_unreachable ();
14592 break;
14593 case GEU:
14594 if (mode == CCmode)
14595 suffix = fp ? "nb" : "ae";
14596 else if (mode == CCCmode)
14597 suffix = "nc";
14598 else
14599 gcc_unreachable ();
14600 break;
14601 case LE:
14602 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14603 suffix = "le";
14604 break;
14605 case LEU:
14606 if (mode == CCmode)
14607 suffix = "be";
14608 else
14609 gcc_unreachable ();
14610 break;
14611 case UNORDERED:
14612 suffix = fp ? "u" : "p";
14613 break;
14614 case ORDERED:
14615 suffix = fp ? "nu" : "np";
14616 break;
14617 default:
14618 gcc_unreachable ();
14620 fputs (suffix, file);
14623 /* Print the name of register X to FILE based on its machine mode and number.
14624 If CODE is 'w', pretend the mode is HImode.
14625 If CODE is 'b', pretend the mode is QImode.
14626 If CODE is 'k', pretend the mode is SImode.
14627 If CODE is 'q', pretend the mode is DImode.
14628 If CODE is 'x', pretend the mode is V4SFmode.
14629 If CODE is 't', pretend the mode is V8SFmode.
14630 If CODE is 'g', pretend the mode is V16SFmode.
14631 If CODE is 'h', pretend the reg is the 'high' byte register.
14632 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14633 If CODE is 'd', duplicate the operand for AVX instruction.
14636 void
14637 print_reg (rtx x, int code, FILE *file)
14639 const char *reg;
14640 unsigned int regno;
14641 bool duplicated = code == 'd' && TARGET_AVX;
14643 if (ASSEMBLER_DIALECT == ASM_ATT)
14644 putc ('%', file);
14646 if (x == pc_rtx)
14648 gcc_assert (TARGET_64BIT);
14649 fputs ("rip", file);
14650 return;
14653 regno = true_regnum (x);
14654 gcc_assert (regno != ARG_POINTER_REGNUM
14655 && regno != FRAME_POINTER_REGNUM
14656 && regno != FLAGS_REG
14657 && regno != FPSR_REG
14658 && regno != FPCR_REG);
14660 if (code == 'w' || MMX_REG_P (x))
14661 code = 2;
14662 else if (code == 'b')
14663 code = 1;
14664 else if (code == 'k')
14665 code = 4;
14666 else if (code == 'q')
14667 code = 8;
14668 else if (code == 'y')
14669 code = 3;
14670 else if (code == 'h')
14671 code = 0;
14672 else if (code == 'x')
14673 code = 16;
14674 else if (code == 't')
14675 code = 32;
14676 else if (code == 'g')
14677 code = 64;
14678 else
14679 code = GET_MODE_SIZE (GET_MODE (x));
14681 /* Irritatingly, AMD extended registers use different naming convention
14682 from the normal registers: "r%d[bwd]" */
14683 if (REX_INT_REGNO_P (regno))
14685 gcc_assert (TARGET_64BIT);
14686 putc ('r', file);
14687 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14688 switch (code)
14690 case 0:
14691 error ("extended registers have no high halves");
14692 break;
14693 case 1:
14694 putc ('b', file);
14695 break;
14696 case 2:
14697 putc ('w', file);
14698 break;
14699 case 4:
14700 putc ('d', file);
14701 break;
14702 case 8:
14703 /* no suffix */
14704 break;
14705 default:
14706 error ("unsupported operand size for extended register");
14707 break;
14709 return;
14712 reg = NULL;
14713 switch (code)
14715 case 3:
14716 if (STACK_TOP_P (x))
14718 reg = "st(0)";
14719 break;
14721 /* FALLTHRU */
14722 case 8:
14723 case 4:
14724 case 12:
14725 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
14726 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14727 /* FALLTHRU */
14728 case 16:
14729 case 2:
14730 normal:
14731 reg = hi_reg_name[regno];
14732 break;
14733 case 1:
14734 if (regno >= ARRAY_SIZE (qi_reg_name))
14735 goto normal;
14736 reg = qi_reg_name[regno];
14737 break;
14738 case 0:
14739 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14740 goto normal;
14741 reg = qi_high_reg_name[regno];
14742 break;
14743 case 32:
14744 if (SSE_REG_P (x))
14746 gcc_assert (!duplicated);
14747 putc ('y', file);
14748 fputs (hi_reg_name[regno] + 1, file);
14749 return;
14751 case 64:
14752 if (SSE_REG_P (x))
14754 gcc_assert (!duplicated);
14755 putc ('z', file);
14756 fputs (hi_reg_name[REGNO (x)] + 1, file);
14757 return;
14759 break;
14760 default:
14761 gcc_unreachable ();
14764 fputs (reg, file);
14765 if (duplicated)
14767 if (ASSEMBLER_DIALECT == ASM_ATT)
14768 fprintf (file, ", %%%s", reg);
14769 else
14770 fprintf (file, ", %s", reg);
14774 /* Locate some local-dynamic symbol still in use by this function
14775 so that we can print its name in some tls_local_dynamic_base
14776 pattern. */
14778 static int
14779 get_some_local_dynamic_name_1 (rtx *px, void *)
14781 rtx x = *px;
14783 if (GET_CODE (x) == SYMBOL_REF
14784 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14786 cfun->machine->some_ld_name = XSTR (x, 0);
14787 return 1;
14790 return 0;
14793 static const char *
14794 get_some_local_dynamic_name (void)
14796 rtx_insn *insn;
14798 if (cfun->machine->some_ld_name)
14799 return cfun->machine->some_ld_name;
14801 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14802 if (NONDEBUG_INSN_P (insn)
14803 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14804 return cfun->machine->some_ld_name;
14806 return NULL;
14809 /* Meaning of CODE:
14810 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14811 C -- print opcode suffix for set/cmov insn.
14812 c -- like C, but print reversed condition
14813 F,f -- likewise, but for floating-point.
14814 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14815 otherwise nothing
14816 R -- print embeded rounding and sae.
14817 r -- print only sae.
14818 z -- print the opcode suffix for the size of the current operand.
14819 Z -- likewise, with special suffixes for x87 instructions.
14820 * -- print a star (in certain assembler syntax)
14821 A -- print an absolute memory reference.
14822 E -- print address with DImode register names if TARGET_64BIT.
14823 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14824 s -- print a shift double count, followed by the assemblers argument
14825 delimiter.
14826 b -- print the QImode name of the register for the indicated operand.
14827 %b0 would print %al if operands[0] is reg 0.
14828 w -- likewise, print the HImode name of the register.
14829 k -- likewise, print the SImode name of the register.
14830 q -- likewise, print the DImode name of the register.
14831 x -- likewise, print the V4SFmode name of the register.
14832 t -- likewise, print the V8SFmode name of the register.
14833 g -- likewise, print the V16SFmode name of the register.
14834 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14835 y -- print "st(0)" instead of "st" as a register.
14836 d -- print duplicated register operand for AVX instruction.
14837 D -- print condition for SSE cmp instruction.
14838 P -- if PIC, print an @PLT suffix.
14839 p -- print raw symbol name.
14840 X -- don't print any sort of PIC '@' suffix for a symbol.
14841 & -- print some in-use local-dynamic symbol name.
14842 H -- print a memory address offset by 8; used for sse high-parts
14843 Y -- print condition for XOP pcom* instruction.
14844 + -- print a branch hint as 'cs' or 'ds' prefix
14845 ; -- print a semicolon (after prefixes due to bug in older gas).
14846 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14847 @ -- print a segment register of thread base pointer load
14848 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14851 void
14852 ix86_print_operand (FILE *file, rtx x, int code)
14854 if (code)
14856 switch (code)
14858 case 'A':
14859 switch (ASSEMBLER_DIALECT)
14861 case ASM_ATT:
14862 putc ('*', file);
14863 break;
14865 case ASM_INTEL:
14866 /* Intel syntax. For absolute addresses, registers should not
14867 be surrounded by braces. */
14868 if (!REG_P (x))
14870 putc ('[', file);
14871 ix86_print_operand (file, x, 0);
14872 putc (']', file);
14873 return;
14875 break;
14877 default:
14878 gcc_unreachable ();
14881 ix86_print_operand (file, x, 0);
14882 return;
14884 case 'E':
14885 /* Wrap address in an UNSPEC to declare special handling. */
14886 if (TARGET_64BIT)
14887 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14889 output_address (x);
14890 return;
14892 case 'L':
14893 if (ASSEMBLER_DIALECT == ASM_ATT)
14894 putc ('l', file);
14895 return;
14897 case 'W':
14898 if (ASSEMBLER_DIALECT == ASM_ATT)
14899 putc ('w', file);
14900 return;
14902 case 'B':
14903 if (ASSEMBLER_DIALECT == ASM_ATT)
14904 putc ('b', file);
14905 return;
14907 case 'Q':
14908 if (ASSEMBLER_DIALECT == ASM_ATT)
14909 putc ('l', file);
14910 return;
14912 case 'S':
14913 if (ASSEMBLER_DIALECT == ASM_ATT)
14914 putc ('s', file);
14915 return;
14917 case 'T':
14918 if (ASSEMBLER_DIALECT == ASM_ATT)
14919 putc ('t', file);
14920 return;
14922 case 'O':
14923 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14924 if (ASSEMBLER_DIALECT != ASM_ATT)
14925 return;
14927 switch (GET_MODE_SIZE (GET_MODE (x)))
14929 case 2:
14930 putc ('w', file);
14931 break;
14933 case 4:
14934 putc ('l', file);
14935 break;
14937 case 8:
14938 putc ('q', file);
14939 break;
14941 default:
14942 output_operand_lossage
14943 ("invalid operand size for operand code 'O'");
14944 return;
14947 putc ('.', file);
14948 #endif
14949 return;
14951 case 'z':
14952 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14954 /* Opcodes don't get size suffixes if using Intel opcodes. */
14955 if (ASSEMBLER_DIALECT == ASM_INTEL)
14956 return;
14958 switch (GET_MODE_SIZE (GET_MODE (x)))
14960 case 1:
14961 putc ('b', file);
14962 return;
14964 case 2:
14965 putc ('w', file);
14966 return;
14968 case 4:
14969 putc ('l', file);
14970 return;
14972 case 8:
14973 putc ('q', file);
14974 return;
14976 default:
14977 output_operand_lossage
14978 ("invalid operand size for operand code 'z'");
14979 return;
14983 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14984 warning
14985 (0, "non-integer operand used with operand code 'z'");
14986 /* FALLTHRU */
14988 case 'Z':
14989 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14990 if (ASSEMBLER_DIALECT == ASM_INTEL)
14991 return;
14993 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14995 switch (GET_MODE_SIZE (GET_MODE (x)))
14997 case 2:
14998 #ifdef HAVE_AS_IX86_FILDS
14999 putc ('s', file);
15000 #endif
15001 return;
15003 case 4:
15004 putc ('l', file);
15005 return;
15007 case 8:
15008 #ifdef HAVE_AS_IX86_FILDQ
15009 putc ('q', file);
15010 #else
15011 fputs ("ll", file);
15012 #endif
15013 return;
15015 default:
15016 break;
15019 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15021 /* 387 opcodes don't get size suffixes
15022 if the operands are registers. */
15023 if (STACK_REG_P (x))
15024 return;
15026 switch (GET_MODE_SIZE (GET_MODE (x)))
15028 case 4:
15029 putc ('s', file);
15030 return;
15032 case 8:
15033 putc ('l', file);
15034 return;
15036 case 12:
15037 case 16:
15038 putc ('t', file);
15039 return;
15041 default:
15042 break;
15045 else
15047 output_operand_lossage
15048 ("invalid operand type used with operand code 'Z'");
15049 return;
15052 output_operand_lossage
15053 ("invalid operand size for operand code 'Z'");
15054 return;
15056 case 'd':
15057 case 'b':
15058 case 'w':
15059 case 'k':
15060 case 'q':
15061 case 'h':
15062 case 't':
15063 case 'g':
15064 case 'y':
15065 case 'x':
15066 case 'X':
15067 case 'P':
15068 case 'p':
15069 break;
15071 case 's':
15072 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15074 ix86_print_operand (file, x, 0);
15075 fputs (", ", file);
15077 return;
15079 case 'Y':
15080 switch (GET_CODE (x))
15082 case NE:
15083 fputs ("neq", file);
15084 break;
15085 case EQ:
15086 fputs ("eq", file);
15087 break;
15088 case GE:
15089 case GEU:
15090 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15091 break;
15092 case GT:
15093 case GTU:
15094 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15095 break;
15096 case LE:
15097 case LEU:
15098 fputs ("le", file);
15099 break;
15100 case LT:
15101 case LTU:
15102 fputs ("lt", file);
15103 break;
15104 case UNORDERED:
15105 fputs ("unord", file);
15106 break;
15107 case ORDERED:
15108 fputs ("ord", file);
15109 break;
15110 case UNEQ:
15111 fputs ("ueq", file);
15112 break;
15113 case UNGE:
15114 fputs ("nlt", file);
15115 break;
15116 case UNGT:
15117 fputs ("nle", file);
15118 break;
15119 case UNLE:
15120 fputs ("ule", file);
15121 break;
15122 case UNLT:
15123 fputs ("ult", file);
15124 break;
15125 case LTGT:
15126 fputs ("une", file);
15127 break;
15128 default:
15129 output_operand_lossage ("operand is not a condition code, "
15130 "invalid operand code 'Y'");
15131 return;
15133 return;
15135 case 'D':
15136 /* Little bit of braindamage here. The SSE compare instructions
15137 does use completely different names for the comparisons that the
15138 fp conditional moves. */
15139 switch (GET_CODE (x))
15141 case UNEQ:
15142 if (TARGET_AVX)
15144 fputs ("eq_us", file);
15145 break;
15147 case EQ:
15148 fputs ("eq", file);
15149 break;
15150 case UNLT:
15151 if (TARGET_AVX)
15153 fputs ("nge", file);
15154 break;
15156 case LT:
15157 fputs ("lt", file);
15158 break;
15159 case UNLE:
15160 if (TARGET_AVX)
15162 fputs ("ngt", file);
15163 break;
15165 case LE:
15166 fputs ("le", file);
15167 break;
15168 case UNORDERED:
15169 fputs ("unord", file);
15170 break;
15171 case LTGT:
15172 if (TARGET_AVX)
15174 fputs ("neq_oq", file);
15175 break;
15177 case NE:
15178 fputs ("neq", file);
15179 break;
15180 case GE:
15181 if (TARGET_AVX)
15183 fputs ("ge", file);
15184 break;
15186 case UNGE:
15187 fputs ("nlt", file);
15188 break;
15189 case GT:
15190 if (TARGET_AVX)
15192 fputs ("gt", file);
15193 break;
15195 case UNGT:
15196 fputs ("nle", file);
15197 break;
15198 case ORDERED:
15199 fputs ("ord", file);
15200 break;
15201 default:
15202 output_operand_lossage ("operand is not a condition code, "
15203 "invalid operand code 'D'");
15204 return;
15206 return;
15208 case 'F':
15209 case 'f':
15210 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15211 if (ASSEMBLER_DIALECT == ASM_ATT)
15212 putc ('.', file);
15213 #endif
15215 case 'C':
15216 case 'c':
15217 if (!COMPARISON_P (x))
15219 output_operand_lossage ("operand is not a condition code, "
15220 "invalid operand code '%c'", code);
15221 return;
15223 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15224 code == 'c' || code == 'f',
15225 code == 'F' || code == 'f',
15226 file);
15227 return;
15229 case 'H':
15230 if (!offsettable_memref_p (x))
15232 output_operand_lossage ("operand is not an offsettable memory "
15233 "reference, invalid operand code 'H'");
15234 return;
15236 /* It doesn't actually matter what mode we use here, as we're
15237 only going to use this for printing. */
15238 x = adjust_address_nv (x, DImode, 8);
15239 /* Output 'qword ptr' for intel assembler dialect. */
15240 if (ASSEMBLER_DIALECT == ASM_INTEL)
15241 code = 'q';
15242 break;
15244 case 'K':
15245 gcc_assert (CONST_INT_P (x));
15247 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15248 #ifdef HAVE_AS_IX86_HLE
15249 fputs ("xacquire ", file);
15250 #else
15251 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15252 #endif
15253 else if (INTVAL (x) & IX86_HLE_RELEASE)
15254 #ifdef HAVE_AS_IX86_HLE
15255 fputs ("xrelease ", file);
15256 #else
15257 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15258 #endif
15259 /* We do not want to print value of the operand. */
15260 return;
15262 case 'N':
15263 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15264 fputs ("{z}", file);
15265 return;
15267 case 'r':
15268 gcc_assert (CONST_INT_P (x));
15269 gcc_assert (INTVAL (x) == ROUND_SAE);
15271 if (ASSEMBLER_DIALECT == ASM_INTEL)
15272 fputs (", ", file);
15274 fputs ("{sae}", file);
15276 if (ASSEMBLER_DIALECT == ASM_ATT)
15277 fputs (", ", file);
15279 return;
15281 case 'R':
15282 gcc_assert (CONST_INT_P (x));
15284 if (ASSEMBLER_DIALECT == ASM_INTEL)
15285 fputs (", ", file);
15287 switch (INTVAL (x))
15289 case ROUND_NEAREST_INT | ROUND_SAE:
15290 fputs ("{rn-sae}", file);
15291 break;
15292 case ROUND_NEG_INF | ROUND_SAE:
15293 fputs ("{rd-sae}", file);
15294 break;
15295 case ROUND_POS_INF | ROUND_SAE:
15296 fputs ("{ru-sae}", file);
15297 break;
15298 case ROUND_ZERO | ROUND_SAE:
15299 fputs ("{rz-sae}", file);
15300 break;
15301 default:
15302 gcc_unreachable ();
15305 if (ASSEMBLER_DIALECT == ASM_ATT)
15306 fputs (", ", file);
15308 return;
15310 case '*':
15311 if (ASSEMBLER_DIALECT == ASM_ATT)
15312 putc ('*', file);
15313 return;
15315 case '&':
15317 const char *name = get_some_local_dynamic_name ();
15318 if (name == NULL)
15319 output_operand_lossage ("'%%&' used without any "
15320 "local dynamic TLS references");
15321 else
15322 assemble_name (file, name);
15323 return;
15326 case '+':
15328 rtx x;
15330 if (!optimize
15331 || optimize_function_for_size_p (cfun)
15332 || !TARGET_BRANCH_PREDICTION_HINTS)
15333 return;
15335 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15336 if (x)
15338 int pred_val = XINT (x, 0);
15340 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15341 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15343 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15344 bool cputaken
15345 = final_forward_branch_p (current_output_insn) == 0;
15347 /* Emit hints only in the case default branch prediction
15348 heuristics would fail. */
15349 if (taken != cputaken)
15351 /* We use 3e (DS) prefix for taken branches and
15352 2e (CS) prefix for not taken branches. */
15353 if (taken)
15354 fputs ("ds ; ", file);
15355 else
15356 fputs ("cs ; ", file);
15360 return;
15363 case ';':
15364 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15365 putc (';', file);
15366 #endif
15367 return;
15369 case '@':
15370 if (ASSEMBLER_DIALECT == ASM_ATT)
15371 putc ('%', file);
15373 /* The kernel uses a different segment register for performance
15374 reasons; a system call would not have to trash the userspace
15375 segment register, which would be expensive. */
15376 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15377 fputs ("fs", file);
15378 else
15379 fputs ("gs", file);
15380 return;
15382 case '~':
15383 putc (TARGET_AVX2 ? 'i' : 'f', file);
15384 return;
15386 case '^':
15387 if (TARGET_64BIT && Pmode != word_mode)
15388 fputs ("addr32 ", file);
15389 return;
15391 default:
15392 output_operand_lossage ("invalid operand code '%c'", code);
15396 if (REG_P (x))
15397 print_reg (x, code, file);
15399 else if (MEM_P (x))
15401 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15402 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15403 && GET_MODE (x) != BLKmode)
15405 const char * size;
15406 switch (GET_MODE_SIZE (GET_MODE (x)))
15408 case 1: size = "BYTE"; break;
15409 case 2: size = "WORD"; break;
15410 case 4: size = "DWORD"; break;
15411 case 8: size = "QWORD"; break;
15412 case 12: size = "TBYTE"; break;
15413 case 16:
15414 if (GET_MODE (x) == XFmode)
15415 size = "TBYTE";
15416 else
15417 size = "XMMWORD";
15418 break;
15419 case 32: size = "YMMWORD"; break;
15420 case 64: size = "ZMMWORD"; break;
15421 default:
15422 gcc_unreachable ();
15425 /* Check for explicit size override (codes 'b', 'w', 'k',
15426 'q' and 'x') */
15427 if (code == 'b')
15428 size = "BYTE";
15429 else if (code == 'w')
15430 size = "WORD";
15431 else if (code == 'k')
15432 size = "DWORD";
15433 else if (code == 'q')
15434 size = "QWORD";
15435 else if (code == 'x')
15436 size = "XMMWORD";
15438 fputs (size, file);
15439 fputs (" PTR ", file);
15442 x = XEXP (x, 0);
15443 /* Avoid (%rip) for call operands. */
15444 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15445 && !CONST_INT_P (x))
15446 output_addr_const (file, x);
15447 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15448 output_operand_lossage ("invalid constraints for operand");
15449 else
15450 output_address (x);
15453 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15455 REAL_VALUE_TYPE r;
15456 long l;
15458 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15459 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15461 if (ASSEMBLER_DIALECT == ASM_ATT)
15462 putc ('$', file);
15463 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15464 if (code == 'q')
15465 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15466 (unsigned long long) (int) l);
15467 else
15468 fprintf (file, "0x%08x", (unsigned int) l);
15471 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15473 REAL_VALUE_TYPE r;
15474 long l[2];
15476 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15477 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15479 if (ASSEMBLER_DIALECT == ASM_ATT)
15480 putc ('$', file);
15481 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15484 /* These float cases don't actually occur as immediate operands. */
15485 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15487 char dstr[30];
15489 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15490 fputs (dstr, file);
15493 else
15495 /* We have patterns that allow zero sets of memory, for instance.
15496 In 64-bit mode, we should probably support all 8-byte vectors,
15497 since we can in fact encode that into an immediate. */
15498 if (GET_CODE (x) == CONST_VECTOR)
15500 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15501 x = const0_rtx;
15504 if (code != 'P' && code != 'p')
15506 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15508 if (ASSEMBLER_DIALECT == ASM_ATT)
15509 putc ('$', file);
15511 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15512 || GET_CODE (x) == LABEL_REF)
15514 if (ASSEMBLER_DIALECT == ASM_ATT)
15515 putc ('$', file);
15516 else
15517 fputs ("OFFSET FLAT:", file);
15520 if (CONST_INT_P (x))
15521 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15522 else if (flag_pic || MACHOPIC_INDIRECT)
15523 output_pic_addr_const (file, x, code);
15524 else
15525 output_addr_const (file, x);
15529 static bool
15530 ix86_print_operand_punct_valid_p (unsigned char code)
15532 return (code == '@' || code == '*' || code == '+' || code == '&'
15533 || code == ';' || code == '~' || code == '^');
15536 /* Print a memory operand whose address is ADDR. */
15538 static void
15539 ix86_print_operand_address (FILE *file, rtx addr)
15541 struct ix86_address parts;
15542 rtx base, index, disp;
15543 int scale;
15544 int ok;
15545 bool vsib = false;
15546 int code = 0;
15548 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15550 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15551 gcc_assert (parts.index == NULL_RTX);
15552 parts.index = XVECEXP (addr, 0, 1);
15553 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15554 addr = XVECEXP (addr, 0, 0);
15555 vsib = true;
15557 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15559 gcc_assert (TARGET_64BIT);
15560 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15561 code = 'q';
15563 else
15564 ok = ix86_decompose_address (addr, &parts);
15566 gcc_assert (ok);
15568 base = parts.base;
15569 index = parts.index;
15570 disp = parts.disp;
15571 scale = parts.scale;
15573 switch (parts.seg)
15575 case SEG_DEFAULT:
15576 break;
15577 case SEG_FS:
15578 case SEG_GS:
15579 if (ASSEMBLER_DIALECT == ASM_ATT)
15580 putc ('%', file);
15581 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15582 break;
15583 default:
15584 gcc_unreachable ();
15587 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15588 if (TARGET_64BIT && !base && !index)
15590 rtx symbol = disp;
15592 if (GET_CODE (disp) == CONST
15593 && GET_CODE (XEXP (disp, 0)) == PLUS
15594 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15595 symbol = XEXP (XEXP (disp, 0), 0);
15597 if (GET_CODE (symbol) == LABEL_REF
15598 || (GET_CODE (symbol) == SYMBOL_REF
15599 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15600 base = pc_rtx;
15602 if (!base && !index)
15604 /* Displacement only requires special attention. */
15606 if (CONST_INT_P (disp))
15608 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15609 fputs ("ds:", file);
15610 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15612 else if (flag_pic)
15613 output_pic_addr_const (file, disp, 0);
15614 else
15615 output_addr_const (file, disp);
15617 else
15619 /* Print SImode register names to force addr32 prefix. */
15620 if (SImode_address_operand (addr, VOIDmode))
15622 #ifdef ENABLE_CHECKING
15623 gcc_assert (TARGET_64BIT);
15624 switch (GET_CODE (addr))
15626 case SUBREG:
15627 gcc_assert (GET_MODE (addr) == SImode);
15628 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15629 break;
15630 case ZERO_EXTEND:
15631 case AND:
15632 gcc_assert (GET_MODE (addr) == DImode);
15633 break;
15634 default:
15635 gcc_unreachable ();
15637 #endif
15638 gcc_assert (!code);
15639 code = 'k';
15641 else if (code == 0
15642 && TARGET_X32
15643 && disp
15644 && CONST_INT_P (disp)
15645 && INTVAL (disp) < -16*1024*1024)
15647 /* X32 runs in 64-bit mode, where displacement, DISP, in
15648 address DISP(%r64), is encoded as 32-bit immediate sign-
15649 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15650 address is %r64 + 0xffffffffbffffd00. When %r64 <
15651 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15652 which is invalid for x32. The correct address is %r64
15653 - 0x40000300 == 0xf7ffdd64. To properly encode
15654 -0x40000300(%r64) for x32, we zero-extend negative
15655 displacement by forcing addr32 prefix which truncates
15656 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15657 zero-extend all negative displacements, including -1(%rsp).
15658 However, for small negative displacements, sign-extension
15659 won't cause overflow. We only zero-extend negative
15660 displacements if they < -16*1024*1024, which is also used
15661 to check legitimate address displacements for PIC. */
15662 code = 'k';
15665 if (ASSEMBLER_DIALECT == ASM_ATT)
15667 if (disp)
15669 if (flag_pic)
15670 output_pic_addr_const (file, disp, 0);
15671 else if (GET_CODE (disp) == LABEL_REF)
15672 output_asm_label (disp);
15673 else
15674 output_addr_const (file, disp);
15677 putc ('(', file);
15678 if (base)
15679 print_reg (base, code, file);
15680 if (index)
15682 putc (',', file);
15683 print_reg (index, vsib ? 0 : code, file);
15684 if (scale != 1 || vsib)
15685 fprintf (file, ",%d", scale);
15687 putc (')', file);
15689 else
15691 rtx offset = NULL_RTX;
15693 if (disp)
15695 /* Pull out the offset of a symbol; print any symbol itself. */
15696 if (GET_CODE (disp) == CONST
15697 && GET_CODE (XEXP (disp, 0)) == PLUS
15698 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15700 offset = XEXP (XEXP (disp, 0), 1);
15701 disp = gen_rtx_CONST (VOIDmode,
15702 XEXP (XEXP (disp, 0), 0));
15705 if (flag_pic)
15706 output_pic_addr_const (file, disp, 0);
15707 else if (GET_CODE (disp) == LABEL_REF)
15708 output_asm_label (disp);
15709 else if (CONST_INT_P (disp))
15710 offset = disp;
15711 else
15712 output_addr_const (file, disp);
15715 putc ('[', file);
15716 if (base)
15718 print_reg (base, code, file);
15719 if (offset)
15721 if (INTVAL (offset) >= 0)
15722 putc ('+', file);
15723 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15726 else if (offset)
15727 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15728 else
15729 putc ('0', file);
15731 if (index)
15733 putc ('+', file);
15734 print_reg (index, vsib ? 0 : code, file);
15735 if (scale != 1 || vsib)
15736 fprintf (file, "*%d", scale);
15738 putc (']', file);
15743 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15745 static bool
15746 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15748 rtx op;
15750 if (GET_CODE (x) != UNSPEC)
15751 return false;
15753 op = XVECEXP (x, 0, 0);
15754 switch (XINT (x, 1))
15756 case UNSPEC_GOTTPOFF:
15757 output_addr_const (file, op);
15758 /* FIXME: This might be @TPOFF in Sun ld. */
15759 fputs ("@gottpoff", file);
15760 break;
15761 case UNSPEC_TPOFF:
15762 output_addr_const (file, op);
15763 fputs ("@tpoff", file);
15764 break;
15765 case UNSPEC_NTPOFF:
15766 output_addr_const (file, op);
15767 if (TARGET_64BIT)
15768 fputs ("@tpoff", file);
15769 else
15770 fputs ("@ntpoff", file);
15771 break;
15772 case UNSPEC_DTPOFF:
15773 output_addr_const (file, op);
15774 fputs ("@dtpoff", file);
15775 break;
15776 case UNSPEC_GOTNTPOFF:
15777 output_addr_const (file, op);
15778 if (TARGET_64BIT)
15779 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15780 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15781 else
15782 fputs ("@gotntpoff", file);
15783 break;
15784 case UNSPEC_INDNTPOFF:
15785 output_addr_const (file, op);
15786 fputs ("@indntpoff", file);
15787 break;
15788 #if TARGET_MACHO
15789 case UNSPEC_MACHOPIC_OFFSET:
15790 output_addr_const (file, op);
15791 putc ('-', file);
15792 machopic_output_function_base_name (file);
15793 break;
15794 #endif
15796 case UNSPEC_STACK_CHECK:
15798 int offset;
15800 gcc_assert (flag_split_stack);
15802 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15803 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15804 #else
15805 gcc_unreachable ();
15806 #endif
15808 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15810 break;
15812 default:
15813 return false;
15816 return true;
15819 /* Split one or more double-mode RTL references into pairs of half-mode
15820 references. The RTL can be REG, offsettable MEM, integer constant, or
15821 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15822 split and "num" is its length. lo_half and hi_half are output arrays
15823 that parallel "operands". */
15825 void
15826 split_double_mode (enum machine_mode mode, rtx operands[],
15827 int num, rtx lo_half[], rtx hi_half[])
15829 enum machine_mode half_mode;
15830 unsigned int byte;
15832 switch (mode)
15834 case TImode:
15835 half_mode = DImode;
15836 break;
15837 case DImode:
15838 half_mode = SImode;
15839 break;
15840 default:
15841 gcc_unreachable ();
15844 byte = GET_MODE_SIZE (half_mode);
15846 while (num--)
15848 rtx op = operands[num];
15850 /* simplify_subreg refuse to split volatile memory addresses,
15851 but we still have to handle it. */
15852 if (MEM_P (op))
15854 lo_half[num] = adjust_address (op, half_mode, 0);
15855 hi_half[num] = adjust_address (op, half_mode, byte);
15857 else
15859 lo_half[num] = simplify_gen_subreg (half_mode, op,
15860 GET_MODE (op) == VOIDmode
15861 ? mode : GET_MODE (op), 0);
15862 hi_half[num] = simplify_gen_subreg (half_mode, op,
15863 GET_MODE (op) == VOIDmode
15864 ? mode : GET_MODE (op), byte);
15869 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15870 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15871 is the expression of the binary operation. The output may either be
15872 emitted here, or returned to the caller, like all output_* functions.
15874 There is no guarantee that the operands are the same mode, as they
15875 might be within FLOAT or FLOAT_EXTEND expressions. */
15877 #ifndef SYSV386_COMPAT
15878 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15879 wants to fix the assemblers because that causes incompatibility
15880 with gcc. No-one wants to fix gcc because that causes
15881 incompatibility with assemblers... You can use the option of
15882 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15883 #define SYSV386_COMPAT 1
15884 #endif
15886 const char *
15887 output_387_binary_op (rtx insn, rtx *operands)
15889 static char buf[40];
15890 const char *p;
15891 const char *ssep;
15892 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15894 #ifdef ENABLE_CHECKING
15895 /* Even if we do not want to check the inputs, this documents input
15896 constraints. Which helps in understanding the following code. */
15897 if (STACK_REG_P (operands[0])
15898 && ((REG_P (operands[1])
15899 && REGNO (operands[0]) == REGNO (operands[1])
15900 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15901 || (REG_P (operands[2])
15902 && REGNO (operands[0]) == REGNO (operands[2])
15903 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15904 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15905 ; /* ok */
15906 else
15907 gcc_assert (is_sse);
15908 #endif
15910 switch (GET_CODE (operands[3]))
15912 case PLUS:
15913 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15914 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15915 p = "fiadd";
15916 else
15917 p = "fadd";
15918 ssep = "vadd";
15919 break;
15921 case MINUS:
15922 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15923 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15924 p = "fisub";
15925 else
15926 p = "fsub";
15927 ssep = "vsub";
15928 break;
15930 case MULT:
15931 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15932 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15933 p = "fimul";
15934 else
15935 p = "fmul";
15936 ssep = "vmul";
15937 break;
15939 case DIV:
15940 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15941 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15942 p = "fidiv";
15943 else
15944 p = "fdiv";
15945 ssep = "vdiv";
15946 break;
15948 default:
15949 gcc_unreachable ();
15952 if (is_sse)
15954 if (TARGET_AVX)
15956 strcpy (buf, ssep);
15957 if (GET_MODE (operands[0]) == SFmode)
15958 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15959 else
15960 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15962 else
15964 strcpy (buf, ssep + 1);
15965 if (GET_MODE (operands[0]) == SFmode)
15966 strcat (buf, "ss\t{%2, %0|%0, %2}");
15967 else
15968 strcat (buf, "sd\t{%2, %0|%0, %2}");
15970 return buf;
15972 strcpy (buf, p);
15974 switch (GET_CODE (operands[3]))
15976 case MULT:
15977 case PLUS:
15978 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15980 rtx temp = operands[2];
15981 operands[2] = operands[1];
15982 operands[1] = temp;
15985 /* know operands[0] == operands[1]. */
15987 if (MEM_P (operands[2]))
15989 p = "%Z2\t%2";
15990 break;
15993 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15995 if (STACK_TOP_P (operands[0]))
15996 /* How is it that we are storing to a dead operand[2]?
15997 Well, presumably operands[1] is dead too. We can't
15998 store the result to st(0) as st(0) gets popped on this
15999 instruction. Instead store to operands[2] (which I
16000 think has to be st(1)). st(1) will be popped later.
16001 gcc <= 2.8.1 didn't have this check and generated
16002 assembly code that the Unixware assembler rejected. */
16003 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16004 else
16005 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16006 break;
16009 if (STACK_TOP_P (operands[0]))
16010 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16011 else
16012 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16013 break;
16015 case MINUS:
16016 case DIV:
16017 if (MEM_P (operands[1]))
16019 p = "r%Z1\t%1";
16020 break;
16023 if (MEM_P (operands[2]))
16025 p = "%Z2\t%2";
16026 break;
16029 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16031 #if SYSV386_COMPAT
16032 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16033 derived assemblers, confusingly reverse the direction of
16034 the operation for fsub{r} and fdiv{r} when the
16035 destination register is not st(0). The Intel assembler
16036 doesn't have this brain damage. Read !SYSV386_COMPAT to
16037 figure out what the hardware really does. */
16038 if (STACK_TOP_P (operands[0]))
16039 p = "{p\t%0, %2|rp\t%2, %0}";
16040 else
16041 p = "{rp\t%2, %0|p\t%0, %2}";
16042 #else
16043 if (STACK_TOP_P (operands[0]))
16044 /* As above for fmul/fadd, we can't store to st(0). */
16045 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16046 else
16047 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16048 #endif
16049 break;
16052 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16054 #if SYSV386_COMPAT
16055 if (STACK_TOP_P (operands[0]))
16056 p = "{rp\t%0, %1|p\t%1, %0}";
16057 else
16058 p = "{p\t%1, %0|rp\t%0, %1}";
16059 #else
16060 if (STACK_TOP_P (operands[0]))
16061 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16062 else
16063 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16064 #endif
16065 break;
16068 if (STACK_TOP_P (operands[0]))
16070 if (STACK_TOP_P (operands[1]))
16071 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16072 else
16073 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16074 break;
16076 else if (STACK_TOP_P (operands[1]))
16078 #if SYSV386_COMPAT
16079 p = "{\t%1, %0|r\t%0, %1}";
16080 #else
16081 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16082 #endif
16084 else
16086 #if SYSV386_COMPAT
16087 p = "{r\t%2, %0|\t%0, %2}";
16088 #else
16089 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16090 #endif
16092 break;
16094 default:
16095 gcc_unreachable ();
16098 strcat (buf, p);
16099 return buf;
16102 /* Check if a 256bit AVX register is referenced inside of EXP. */
16104 static int
16105 ix86_check_avx256_register (rtx *pexp, void *)
16107 rtx exp = *pexp;
16109 if (GET_CODE (exp) == SUBREG)
16110 exp = SUBREG_REG (exp);
16112 if (REG_P (exp)
16113 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16114 return 1;
16116 return 0;
16119 /* Return needed mode for entity in optimize_mode_switching pass. */
16121 static int
16122 ix86_avx_u128_mode_needed (rtx_insn *insn)
16124 if (CALL_P (insn))
16126 rtx link;
16128 /* Needed mode is set to AVX_U128_CLEAN if there are
16129 no 256bit modes used in function arguments. */
16130 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16131 link;
16132 link = XEXP (link, 1))
16134 if (GET_CODE (XEXP (link, 0)) == USE)
16136 rtx arg = XEXP (XEXP (link, 0), 0);
16138 if (ix86_check_avx256_register (&arg, NULL))
16139 return AVX_U128_DIRTY;
16143 return AVX_U128_CLEAN;
16146 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16147 changes state only when a 256bit register is written to, but we need
16148 to prevent the compiler from moving optimal insertion point above
16149 eventual read from 256bit register. */
16150 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16151 return AVX_U128_DIRTY;
16153 return AVX_U128_ANY;
16156 /* Return mode that i387 must be switched into
16157 prior to the execution of insn. */
16159 static int
16160 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16162 enum attr_i387_cw mode;
16164 /* The mode UNINITIALIZED is used to store control word after a
16165 function call or ASM pattern. The mode ANY specify that function
16166 has no requirements on the control word and make no changes in the
16167 bits we are interested in. */
16169 if (CALL_P (insn)
16170 || (NONJUMP_INSN_P (insn)
16171 && (asm_noperands (PATTERN (insn)) >= 0
16172 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16173 return I387_CW_UNINITIALIZED;
16175 if (recog_memoized (insn) < 0)
16176 return I387_CW_ANY;
16178 mode = get_attr_i387_cw (insn);
16180 switch (entity)
16182 case I387_TRUNC:
16183 if (mode == I387_CW_TRUNC)
16184 return mode;
16185 break;
16187 case I387_FLOOR:
16188 if (mode == I387_CW_FLOOR)
16189 return mode;
16190 break;
16192 case I387_CEIL:
16193 if (mode == I387_CW_CEIL)
16194 return mode;
16195 break;
16197 case I387_MASK_PM:
16198 if (mode == I387_CW_MASK_PM)
16199 return mode;
16200 break;
16202 default:
16203 gcc_unreachable ();
16206 return I387_CW_ANY;
16209 /* Return mode that entity must be switched into
16210 prior to the execution of insn. */
16212 static int
16213 ix86_mode_needed (int entity, rtx_insn *insn)
16215 switch (entity)
16217 case AVX_U128:
16218 return ix86_avx_u128_mode_needed (insn);
16219 case I387_TRUNC:
16220 case I387_FLOOR:
16221 case I387_CEIL:
16222 case I387_MASK_PM:
16223 return ix86_i387_mode_needed (entity, insn);
16224 default:
16225 gcc_unreachable ();
16227 return 0;
16230 /* Check if a 256bit AVX register is referenced in stores. */
16232 static void
16233 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16235 if (ix86_check_avx256_register (&dest, NULL))
16237 bool *used = (bool *) data;
16238 *used = true;
16242 /* Calculate mode of upper 128bit AVX registers after the insn. */
16244 static int
16245 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16247 rtx pat = PATTERN (insn);
16249 if (vzeroupper_operation (pat, VOIDmode)
16250 || vzeroall_operation (pat, VOIDmode))
16251 return AVX_U128_CLEAN;
16253 /* We know that state is clean after CALL insn if there are no
16254 256bit registers used in the function return register. */
16255 if (CALL_P (insn))
16257 bool avx_reg256_found = false;
16258 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16260 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16263 /* Otherwise, return current mode. Remember that if insn
16264 references AVX 256bit registers, the mode was already changed
16265 to DIRTY from MODE_NEEDED. */
16266 return mode;
16269 /* Return the mode that an insn results in. */
16272 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16274 switch (entity)
16276 case AVX_U128:
16277 return ix86_avx_u128_mode_after (mode, insn);
16278 case I387_TRUNC:
16279 case I387_FLOOR:
16280 case I387_CEIL:
16281 case I387_MASK_PM:
16282 return mode;
16283 default:
16284 gcc_unreachable ();
16288 static int
16289 ix86_avx_u128_mode_entry (void)
16291 tree arg;
16293 /* Entry mode is set to AVX_U128_DIRTY if there are
16294 256bit modes used in function arguments. */
16295 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16296 arg = TREE_CHAIN (arg))
16298 rtx incoming = DECL_INCOMING_RTL (arg);
16300 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16301 return AVX_U128_DIRTY;
16304 return AVX_U128_CLEAN;
16307 /* Return a mode that ENTITY is assumed to be
16308 switched to at function entry. */
16310 static int
16311 ix86_mode_entry (int entity)
16313 switch (entity)
16315 case AVX_U128:
16316 return ix86_avx_u128_mode_entry ();
16317 case I387_TRUNC:
16318 case I387_FLOOR:
16319 case I387_CEIL:
16320 case I387_MASK_PM:
16321 return I387_CW_ANY;
16322 default:
16323 gcc_unreachable ();
16327 static int
16328 ix86_avx_u128_mode_exit (void)
16330 rtx reg = crtl->return_rtx;
16332 /* Exit mode is set to AVX_U128_DIRTY if there are
16333 256bit modes used in the function return register. */
16334 if (reg && ix86_check_avx256_register (&reg, NULL))
16335 return AVX_U128_DIRTY;
16337 return AVX_U128_CLEAN;
16340 /* Return a mode that ENTITY is assumed to be
16341 switched to at function exit. */
16343 static int
16344 ix86_mode_exit (int entity)
16346 switch (entity)
16348 case AVX_U128:
16349 return ix86_avx_u128_mode_exit ();
16350 case I387_TRUNC:
16351 case I387_FLOOR:
16352 case I387_CEIL:
16353 case I387_MASK_PM:
16354 return I387_CW_ANY;
16355 default:
16356 gcc_unreachable ();
16360 static int
16361 ix86_mode_priority (int, int n)
16363 return n;
16366 /* Output code to initialize control word copies used by trunc?f?i and
16367 rounding patterns. CURRENT_MODE is set to current control word,
16368 while NEW_MODE is set to new control word. */
16370 static void
16371 emit_i387_cw_initialization (int mode)
16373 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16374 rtx new_mode;
16376 enum ix86_stack_slot slot;
16378 rtx reg = gen_reg_rtx (HImode);
16380 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16381 emit_move_insn (reg, copy_rtx (stored_mode));
16383 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16384 || optimize_insn_for_size_p ())
16386 switch (mode)
16388 case I387_CW_TRUNC:
16389 /* round toward zero (truncate) */
16390 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16391 slot = SLOT_CW_TRUNC;
16392 break;
16394 case I387_CW_FLOOR:
16395 /* round down toward -oo */
16396 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16397 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16398 slot = SLOT_CW_FLOOR;
16399 break;
16401 case I387_CW_CEIL:
16402 /* round up toward +oo */
16403 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16404 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16405 slot = SLOT_CW_CEIL;
16406 break;
16408 case I387_CW_MASK_PM:
16409 /* mask precision exception for nearbyint() */
16410 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16411 slot = SLOT_CW_MASK_PM;
16412 break;
16414 default:
16415 gcc_unreachable ();
16418 else
16420 switch (mode)
16422 case I387_CW_TRUNC:
16423 /* round toward zero (truncate) */
16424 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16425 slot = SLOT_CW_TRUNC;
16426 break;
16428 case I387_CW_FLOOR:
16429 /* round down toward -oo */
16430 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16431 slot = SLOT_CW_FLOOR;
16432 break;
16434 case I387_CW_CEIL:
16435 /* round up toward +oo */
16436 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16437 slot = SLOT_CW_CEIL;
16438 break;
16440 case I387_CW_MASK_PM:
16441 /* mask precision exception for nearbyint() */
16442 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16443 slot = SLOT_CW_MASK_PM;
16444 break;
16446 default:
16447 gcc_unreachable ();
16451 gcc_assert (slot < MAX_386_STACK_LOCALS);
16453 new_mode = assign_386_stack_local (HImode, slot);
16454 emit_move_insn (new_mode, reg);
16457 /* Emit vzeroupper. */
16459 void
16460 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16462 int i;
16464 /* Cancel automatic vzeroupper insertion if there are
16465 live call-saved SSE registers at the insertion point. */
16467 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16468 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16469 return;
16471 if (TARGET_64BIT)
16472 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16473 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16474 return;
16476 emit_insn (gen_avx_vzeroupper ());
16479 /* Generate one or more insns to set ENTITY to MODE. */
16481 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16482 is the set of hard registers live at the point where the insn(s)
16483 are to be inserted. */
16485 static void
16486 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16487 HARD_REG_SET regs_live)
16489 switch (entity)
16491 case AVX_U128:
16492 if (mode == AVX_U128_CLEAN)
16493 ix86_avx_emit_vzeroupper (regs_live);
16494 break;
16495 case I387_TRUNC:
16496 case I387_FLOOR:
16497 case I387_CEIL:
16498 case I387_MASK_PM:
16499 if (mode != I387_CW_ANY
16500 && mode != I387_CW_UNINITIALIZED)
16501 emit_i387_cw_initialization (mode);
16502 break;
16503 default:
16504 gcc_unreachable ();
16508 /* Output code for INSN to convert a float to a signed int. OPERANDS
16509 are the insn operands. The output may be [HSD]Imode and the input
16510 operand may be [SDX]Fmode. */
16512 const char *
16513 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16515 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16516 int dimode_p = GET_MODE (operands[0]) == DImode;
16517 int round_mode = get_attr_i387_cw (insn);
16519 /* Jump through a hoop or two for DImode, since the hardware has no
16520 non-popping instruction. We used to do this a different way, but
16521 that was somewhat fragile and broke with post-reload splitters. */
16522 if ((dimode_p || fisttp) && !stack_top_dies)
16523 output_asm_insn ("fld\t%y1", operands);
16525 gcc_assert (STACK_TOP_P (operands[1]));
16526 gcc_assert (MEM_P (operands[0]));
16527 gcc_assert (GET_MODE (operands[1]) != TFmode);
16529 if (fisttp)
16530 output_asm_insn ("fisttp%Z0\t%0", operands);
16531 else
16533 if (round_mode != I387_CW_ANY)
16534 output_asm_insn ("fldcw\t%3", operands);
16535 if (stack_top_dies || dimode_p)
16536 output_asm_insn ("fistp%Z0\t%0", operands);
16537 else
16538 output_asm_insn ("fist%Z0\t%0", operands);
16539 if (round_mode != I387_CW_ANY)
16540 output_asm_insn ("fldcw\t%2", operands);
16543 return "";
16546 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16547 have the values zero or one, indicates the ffreep insn's operand
16548 from the OPERANDS array. */
16550 static const char *
16551 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16553 if (TARGET_USE_FFREEP)
16554 #ifdef HAVE_AS_IX86_FFREEP
16555 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16556 #else
16558 static char retval[32];
16559 int regno = REGNO (operands[opno]);
16561 gcc_assert (STACK_REGNO_P (regno));
16563 regno -= FIRST_STACK_REG;
16565 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16566 return retval;
16568 #endif
16570 return opno ? "fstp\t%y1" : "fstp\t%y0";
16574 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16575 should be used. UNORDERED_P is true when fucom should be used. */
16577 const char *
16578 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16580 int stack_top_dies;
16581 rtx cmp_op0, cmp_op1;
16582 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16584 if (eflags_p)
16586 cmp_op0 = operands[0];
16587 cmp_op1 = operands[1];
16589 else
16591 cmp_op0 = operands[1];
16592 cmp_op1 = operands[2];
16595 if (is_sse)
16597 if (GET_MODE (operands[0]) == SFmode)
16598 if (unordered_p)
16599 return "%vucomiss\t{%1, %0|%0, %1}";
16600 else
16601 return "%vcomiss\t{%1, %0|%0, %1}";
16602 else
16603 if (unordered_p)
16604 return "%vucomisd\t{%1, %0|%0, %1}";
16605 else
16606 return "%vcomisd\t{%1, %0|%0, %1}";
16609 gcc_assert (STACK_TOP_P (cmp_op0));
16611 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16613 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16615 if (stack_top_dies)
16617 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16618 return output_387_ffreep (operands, 1);
16620 else
16621 return "ftst\n\tfnstsw\t%0";
16624 if (STACK_REG_P (cmp_op1)
16625 && stack_top_dies
16626 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16627 && REGNO (cmp_op1) != FIRST_STACK_REG)
16629 /* If both the top of the 387 stack dies, and the other operand
16630 is also a stack register that dies, then this must be a
16631 `fcompp' float compare */
16633 if (eflags_p)
16635 /* There is no double popping fcomi variant. Fortunately,
16636 eflags is immune from the fstp's cc clobbering. */
16637 if (unordered_p)
16638 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16639 else
16640 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16641 return output_387_ffreep (operands, 0);
16643 else
16645 if (unordered_p)
16646 return "fucompp\n\tfnstsw\t%0";
16647 else
16648 return "fcompp\n\tfnstsw\t%0";
16651 else
16653 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16655 static const char * const alt[16] =
16657 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16658 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16659 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16660 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16662 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16663 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16664 NULL,
16665 NULL,
16667 "fcomi\t{%y1, %0|%0, %y1}",
16668 "fcomip\t{%y1, %0|%0, %y1}",
16669 "fucomi\t{%y1, %0|%0, %y1}",
16670 "fucomip\t{%y1, %0|%0, %y1}",
16672 NULL,
16673 NULL,
16674 NULL,
16675 NULL
16678 int mask;
16679 const char *ret;
16681 mask = eflags_p << 3;
16682 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16683 mask |= unordered_p << 1;
16684 mask |= stack_top_dies;
16686 gcc_assert (mask < 16);
16687 ret = alt[mask];
16688 gcc_assert (ret);
16690 return ret;
16694 void
16695 ix86_output_addr_vec_elt (FILE *file, int value)
16697 const char *directive = ASM_LONG;
16699 #ifdef ASM_QUAD
16700 if (TARGET_LP64)
16701 directive = ASM_QUAD;
16702 #else
16703 gcc_assert (!TARGET_64BIT);
16704 #endif
16706 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16709 void
16710 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16712 const char *directive = ASM_LONG;
16714 #ifdef ASM_QUAD
16715 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16716 directive = ASM_QUAD;
16717 #else
16718 gcc_assert (!TARGET_64BIT);
16719 #endif
16720 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16721 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16722 fprintf (file, "%s%s%d-%s%d\n",
16723 directive, LPREFIX, value, LPREFIX, rel);
16724 else if (HAVE_AS_GOTOFF_IN_DATA)
16725 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16726 #if TARGET_MACHO
16727 else if (TARGET_MACHO)
16729 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16730 machopic_output_function_base_name (file);
16731 putc ('\n', file);
16733 #endif
16734 else
16735 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16736 GOT_SYMBOL_NAME, LPREFIX, value);
16739 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16740 for the target. */
16742 void
16743 ix86_expand_clear (rtx dest)
16745 rtx tmp;
16747 /* We play register width games, which are only valid after reload. */
16748 gcc_assert (reload_completed);
16750 /* Avoid HImode and its attendant prefix byte. */
16751 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16752 dest = gen_rtx_REG (SImode, REGNO (dest));
16753 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16755 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16757 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16758 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16761 emit_insn (tmp);
16764 /* X is an unchanging MEM. If it is a constant pool reference, return
16765 the constant pool rtx, else NULL. */
16768 maybe_get_pool_constant (rtx x)
16770 x = ix86_delegitimize_address (XEXP (x, 0));
16772 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16773 return get_pool_constant (x);
16775 return NULL_RTX;
16778 void
16779 ix86_expand_move (enum machine_mode mode, rtx operands[])
16781 rtx op0, op1;
16782 enum tls_model model;
16784 op0 = operands[0];
16785 op1 = operands[1];
16787 if (GET_CODE (op1) == SYMBOL_REF)
16789 rtx tmp;
16791 model = SYMBOL_REF_TLS_MODEL (op1);
16792 if (model)
16794 op1 = legitimize_tls_address (op1, model, true);
16795 op1 = force_operand (op1, op0);
16796 if (op1 == op0)
16797 return;
16798 op1 = convert_to_mode (mode, op1, 1);
16800 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16801 op1 = tmp;
16803 else if (GET_CODE (op1) == CONST
16804 && GET_CODE (XEXP (op1, 0)) == PLUS
16805 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16807 rtx addend = XEXP (XEXP (op1, 0), 1);
16808 rtx symbol = XEXP (XEXP (op1, 0), 0);
16809 rtx tmp;
16811 model = SYMBOL_REF_TLS_MODEL (symbol);
16812 if (model)
16813 tmp = legitimize_tls_address (symbol, model, true);
16814 else
16815 tmp = legitimize_pe_coff_symbol (symbol, true);
16817 if (tmp)
16819 tmp = force_operand (tmp, NULL);
16820 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16821 op0, 1, OPTAB_DIRECT);
16822 if (tmp == op0)
16823 return;
16824 op1 = convert_to_mode (mode, tmp, 1);
16828 if ((flag_pic || MACHOPIC_INDIRECT)
16829 && symbolic_operand (op1, mode))
16831 if (TARGET_MACHO && !TARGET_64BIT)
16833 #if TARGET_MACHO
16834 /* dynamic-no-pic */
16835 if (MACHOPIC_INDIRECT)
16837 rtx temp = ((reload_in_progress
16838 || ((op0 && REG_P (op0))
16839 && mode == Pmode))
16840 ? op0 : gen_reg_rtx (Pmode));
16841 op1 = machopic_indirect_data_reference (op1, temp);
16842 if (MACHOPIC_PURE)
16843 op1 = machopic_legitimize_pic_address (op1, mode,
16844 temp == op1 ? 0 : temp);
16846 if (op0 != op1 && GET_CODE (op0) != MEM)
16848 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16849 emit_insn (insn);
16850 return;
16852 if (GET_CODE (op0) == MEM)
16853 op1 = force_reg (Pmode, op1);
16854 else
16856 rtx temp = op0;
16857 if (GET_CODE (temp) != REG)
16858 temp = gen_reg_rtx (Pmode);
16859 temp = legitimize_pic_address (op1, temp);
16860 if (temp == op0)
16861 return;
16862 op1 = temp;
16864 /* dynamic-no-pic */
16865 #endif
16867 else
16869 if (MEM_P (op0))
16870 op1 = force_reg (mode, op1);
16871 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16873 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16874 op1 = legitimize_pic_address (op1, reg);
16875 if (op0 == op1)
16876 return;
16877 op1 = convert_to_mode (mode, op1, 1);
16881 else
16883 if (MEM_P (op0)
16884 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16885 || !push_operand (op0, mode))
16886 && MEM_P (op1))
16887 op1 = force_reg (mode, op1);
16889 if (push_operand (op0, mode)
16890 && ! general_no_elim_operand (op1, mode))
16891 op1 = copy_to_mode_reg (mode, op1);
16893 /* Force large constants in 64bit compilation into register
16894 to get them CSEed. */
16895 if (can_create_pseudo_p ()
16896 && (mode == DImode) && TARGET_64BIT
16897 && immediate_operand (op1, mode)
16898 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16899 && !register_operand (op0, mode)
16900 && optimize)
16901 op1 = copy_to_mode_reg (mode, op1);
16903 if (can_create_pseudo_p ()
16904 && FLOAT_MODE_P (mode)
16905 && GET_CODE (op1) == CONST_DOUBLE)
16907 /* If we are loading a floating point constant to a register,
16908 force the value to memory now, since we'll get better code
16909 out the back end. */
16911 op1 = validize_mem (force_const_mem (mode, op1));
16912 if (!register_operand (op0, mode))
16914 rtx temp = gen_reg_rtx (mode);
16915 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16916 emit_move_insn (op0, temp);
16917 return;
16922 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16925 void
16926 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16928 rtx op0 = operands[0], op1 = operands[1];
16929 unsigned int align = GET_MODE_ALIGNMENT (mode);
16931 if (push_operand (op0, VOIDmode))
16932 op0 = emit_move_resolve_push (mode, op0);
16934 /* Force constants other than zero into memory. We do not know how
16935 the instructions used to build constants modify the upper 64 bits
16936 of the register, once we have that information we may be able
16937 to handle some of them more efficiently. */
16938 if (can_create_pseudo_p ()
16939 && register_operand (op0, mode)
16940 && (CONSTANT_P (op1)
16941 || (GET_CODE (op1) == SUBREG
16942 && CONSTANT_P (SUBREG_REG (op1))))
16943 && !standard_sse_constant_p (op1))
16944 op1 = validize_mem (force_const_mem (mode, op1));
16946 /* We need to check memory alignment for SSE mode since attribute
16947 can make operands unaligned. */
16948 if (can_create_pseudo_p ()
16949 && SSE_REG_MODE_P (mode)
16950 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16951 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16953 rtx tmp[2];
16955 /* ix86_expand_vector_move_misalign() does not like constants ... */
16956 if (CONSTANT_P (op1)
16957 || (GET_CODE (op1) == SUBREG
16958 && CONSTANT_P (SUBREG_REG (op1))))
16959 op1 = validize_mem (force_const_mem (mode, op1));
16961 /* ... nor both arguments in memory. */
16962 if (!register_operand (op0, mode)
16963 && !register_operand (op1, mode))
16964 op1 = force_reg (mode, op1);
16966 tmp[0] = op0; tmp[1] = op1;
16967 ix86_expand_vector_move_misalign (mode, tmp);
16968 return;
16971 /* Make operand1 a register if it isn't already. */
16972 if (can_create_pseudo_p ()
16973 && !register_operand (op0, mode)
16974 && !register_operand (op1, mode))
16976 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16977 return;
16980 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16983 /* Split 32-byte AVX unaligned load and store if needed. */
16985 static void
16986 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16988 rtx m;
16989 rtx (*extract) (rtx, rtx, rtx);
16990 rtx (*load_unaligned) (rtx, rtx);
16991 rtx (*store_unaligned) (rtx, rtx);
16992 enum machine_mode mode;
16994 switch (GET_MODE (op0))
16996 default:
16997 gcc_unreachable ();
16998 case V32QImode:
16999 extract = gen_avx_vextractf128v32qi;
17000 load_unaligned = gen_avx_loaddquv32qi;
17001 store_unaligned = gen_avx_storedquv32qi;
17002 mode = V16QImode;
17003 break;
17004 case V8SFmode:
17005 extract = gen_avx_vextractf128v8sf;
17006 load_unaligned = gen_avx_loadups256;
17007 store_unaligned = gen_avx_storeups256;
17008 mode = V4SFmode;
17009 break;
17010 case V4DFmode:
17011 extract = gen_avx_vextractf128v4df;
17012 load_unaligned = gen_avx_loadupd256;
17013 store_unaligned = gen_avx_storeupd256;
17014 mode = V2DFmode;
17015 break;
17018 if (MEM_P (op1))
17020 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17022 rtx r = gen_reg_rtx (mode);
17023 m = adjust_address (op1, mode, 0);
17024 emit_move_insn (r, m);
17025 m = adjust_address (op1, mode, 16);
17026 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17027 emit_move_insn (op0, r);
17029 /* Normal *mov<mode>_internal pattern will handle
17030 unaligned loads just fine if misaligned_operand
17031 is true, and without the UNSPEC it can be combined
17032 with arithmetic instructions. */
17033 else if (misaligned_operand (op1, GET_MODE (op1)))
17034 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17035 else
17036 emit_insn (load_unaligned (op0, op1));
17038 else if (MEM_P (op0))
17040 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17042 m = adjust_address (op0, mode, 0);
17043 emit_insn (extract (m, op1, const0_rtx));
17044 m = adjust_address (op0, mode, 16);
17045 emit_insn (extract (m, op1, const1_rtx));
17047 else
17048 emit_insn (store_unaligned (op0, op1));
17050 else
17051 gcc_unreachable ();
17054 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17055 straight to ix86_expand_vector_move. */
17056 /* Code generation for scalar reg-reg moves of single and double precision data:
17057 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17058 movaps reg, reg
17059 else
17060 movss reg, reg
17061 if (x86_sse_partial_reg_dependency == true)
17062 movapd reg, reg
17063 else
17064 movsd reg, reg
17066 Code generation for scalar loads of double precision data:
17067 if (x86_sse_split_regs == true)
17068 movlpd mem, reg (gas syntax)
17069 else
17070 movsd mem, reg
17072 Code generation for unaligned packed loads of single precision data
17073 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17074 if (x86_sse_unaligned_move_optimal)
17075 movups mem, reg
17077 if (x86_sse_partial_reg_dependency == true)
17079 xorps reg, reg
17080 movlps mem, reg
17081 movhps mem+8, reg
17083 else
17085 movlps mem, reg
17086 movhps mem+8, reg
17089 Code generation for unaligned packed loads of double precision data
17090 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17091 if (x86_sse_unaligned_move_optimal)
17092 movupd mem, reg
17094 if (x86_sse_split_regs == true)
17096 movlpd mem, reg
17097 movhpd mem+8, reg
17099 else
17101 movsd mem, reg
17102 movhpd mem+8, reg
17106 void
17107 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17109 rtx op0, op1, orig_op0 = NULL_RTX, m;
17110 rtx (*load_unaligned) (rtx, rtx);
17111 rtx (*store_unaligned) (rtx, rtx);
17113 op0 = operands[0];
17114 op1 = operands[1];
17116 if (GET_MODE_SIZE (mode) == 64)
17118 switch (GET_MODE_CLASS (mode))
17120 case MODE_VECTOR_INT:
17121 case MODE_INT:
17122 if (GET_MODE (op0) != V16SImode)
17124 if (!MEM_P (op0))
17126 orig_op0 = op0;
17127 op0 = gen_reg_rtx (V16SImode);
17129 else
17130 op0 = gen_lowpart (V16SImode, op0);
17132 op1 = gen_lowpart (V16SImode, op1);
17133 /* FALLTHRU */
17135 case MODE_VECTOR_FLOAT:
17136 switch (GET_MODE (op0))
17138 default:
17139 gcc_unreachable ();
17140 case V16SImode:
17141 load_unaligned = gen_avx512f_loaddquv16si;
17142 store_unaligned = gen_avx512f_storedquv16si;
17143 break;
17144 case V16SFmode:
17145 load_unaligned = gen_avx512f_loadups512;
17146 store_unaligned = gen_avx512f_storeups512;
17147 break;
17148 case V8DFmode:
17149 load_unaligned = gen_avx512f_loadupd512;
17150 store_unaligned = gen_avx512f_storeupd512;
17151 break;
17154 if (MEM_P (op1))
17155 emit_insn (load_unaligned (op0, op1));
17156 else if (MEM_P (op0))
17157 emit_insn (store_unaligned (op0, op1));
17158 else
17159 gcc_unreachable ();
17160 if (orig_op0)
17161 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17162 break;
17164 default:
17165 gcc_unreachable ();
17168 return;
17171 if (TARGET_AVX
17172 && GET_MODE_SIZE (mode) == 32)
17174 switch (GET_MODE_CLASS (mode))
17176 case MODE_VECTOR_INT:
17177 case MODE_INT:
17178 if (GET_MODE (op0) != V32QImode)
17180 if (!MEM_P (op0))
17182 orig_op0 = op0;
17183 op0 = gen_reg_rtx (V32QImode);
17185 else
17186 op0 = gen_lowpart (V32QImode, op0);
17188 op1 = gen_lowpart (V32QImode, op1);
17189 /* FALLTHRU */
17191 case MODE_VECTOR_FLOAT:
17192 ix86_avx256_split_vector_move_misalign (op0, op1);
17193 if (orig_op0)
17194 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17195 break;
17197 default:
17198 gcc_unreachable ();
17201 return;
17204 if (MEM_P (op1))
17206 /* Normal *mov<mode>_internal pattern will handle
17207 unaligned loads just fine if misaligned_operand
17208 is true, and without the UNSPEC it can be combined
17209 with arithmetic instructions. */
17210 if (TARGET_AVX
17211 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17212 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17213 && misaligned_operand (op1, GET_MODE (op1)))
17214 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17215 /* ??? If we have typed data, then it would appear that using
17216 movdqu is the only way to get unaligned data loaded with
17217 integer type. */
17218 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17220 if (GET_MODE (op0) != V16QImode)
17222 orig_op0 = op0;
17223 op0 = gen_reg_rtx (V16QImode);
17225 op1 = gen_lowpart (V16QImode, op1);
17226 /* We will eventually emit movups based on insn attributes. */
17227 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17228 if (orig_op0)
17229 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17231 else if (TARGET_SSE2 && mode == V2DFmode)
17233 rtx zero;
17235 if (TARGET_AVX
17236 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17237 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17238 || optimize_insn_for_size_p ())
17240 /* We will eventually emit movups based on insn attributes. */
17241 emit_insn (gen_sse2_loadupd (op0, op1));
17242 return;
17245 /* When SSE registers are split into halves, we can avoid
17246 writing to the top half twice. */
17247 if (TARGET_SSE_SPLIT_REGS)
17249 emit_clobber (op0);
17250 zero = op0;
17252 else
17254 /* ??? Not sure about the best option for the Intel chips.
17255 The following would seem to satisfy; the register is
17256 entirely cleared, breaking the dependency chain. We
17257 then store to the upper half, with a dependency depth
17258 of one. A rumor has it that Intel recommends two movsd
17259 followed by an unpacklpd, but this is unconfirmed. And
17260 given that the dependency depth of the unpacklpd would
17261 still be one, I'm not sure why this would be better. */
17262 zero = CONST0_RTX (V2DFmode);
17265 m = adjust_address (op1, DFmode, 0);
17266 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17267 m = adjust_address (op1, DFmode, 8);
17268 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17270 else
17272 rtx t;
17274 if (TARGET_AVX
17275 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17276 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17277 || optimize_insn_for_size_p ())
17279 if (GET_MODE (op0) != V4SFmode)
17281 orig_op0 = op0;
17282 op0 = gen_reg_rtx (V4SFmode);
17284 op1 = gen_lowpart (V4SFmode, op1);
17285 emit_insn (gen_sse_loadups (op0, op1));
17286 if (orig_op0)
17287 emit_move_insn (orig_op0,
17288 gen_lowpart (GET_MODE (orig_op0), op0));
17289 return;
17292 if (mode != V4SFmode)
17293 t = gen_reg_rtx (V4SFmode);
17294 else
17295 t = op0;
17297 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17298 emit_move_insn (t, CONST0_RTX (V4SFmode));
17299 else
17300 emit_clobber (t);
17302 m = adjust_address (op1, V2SFmode, 0);
17303 emit_insn (gen_sse_loadlps (t, t, m));
17304 m = adjust_address (op1, V2SFmode, 8);
17305 emit_insn (gen_sse_loadhps (t, t, m));
17306 if (mode != V4SFmode)
17307 emit_move_insn (op0, gen_lowpart (mode, t));
17310 else if (MEM_P (op0))
17312 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17314 op0 = gen_lowpart (V16QImode, op0);
17315 op1 = gen_lowpart (V16QImode, op1);
17316 /* We will eventually emit movups based on insn attributes. */
17317 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17319 else if (TARGET_SSE2 && mode == V2DFmode)
17321 if (TARGET_AVX
17322 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17323 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17324 || optimize_insn_for_size_p ())
17325 /* We will eventually emit movups based on insn attributes. */
17326 emit_insn (gen_sse2_storeupd (op0, op1));
17327 else
17329 m = adjust_address (op0, DFmode, 0);
17330 emit_insn (gen_sse2_storelpd (m, op1));
17331 m = adjust_address (op0, DFmode, 8);
17332 emit_insn (gen_sse2_storehpd (m, op1));
17335 else
17337 if (mode != V4SFmode)
17338 op1 = gen_lowpart (V4SFmode, op1);
17340 if (TARGET_AVX
17341 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17342 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17343 || optimize_insn_for_size_p ())
17345 op0 = gen_lowpart (V4SFmode, op0);
17346 emit_insn (gen_sse_storeups (op0, op1));
17348 else
17350 m = adjust_address (op0, V2SFmode, 0);
17351 emit_insn (gen_sse_storelps (m, op1));
17352 m = adjust_address (op0, V2SFmode, 8);
17353 emit_insn (gen_sse_storehps (m, op1));
17357 else
17358 gcc_unreachable ();
17361 /* Helper function of ix86_fixup_binary_operands to canonicalize
17362 operand order. Returns true if the operands should be swapped. */
17364 static bool
17365 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17366 rtx operands[])
17368 rtx dst = operands[0];
17369 rtx src1 = operands[1];
17370 rtx src2 = operands[2];
17372 /* If the operation is not commutative, we can't do anything. */
17373 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17374 return false;
17376 /* Highest priority is that src1 should match dst. */
17377 if (rtx_equal_p (dst, src1))
17378 return false;
17379 if (rtx_equal_p (dst, src2))
17380 return true;
17382 /* Next highest priority is that immediate constants come second. */
17383 if (immediate_operand (src2, mode))
17384 return false;
17385 if (immediate_operand (src1, mode))
17386 return true;
17388 /* Lowest priority is that memory references should come second. */
17389 if (MEM_P (src2))
17390 return false;
17391 if (MEM_P (src1))
17392 return true;
17394 return false;
17398 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17399 destination to use for the operation. If different from the true
17400 destination in operands[0], a copy operation will be required. */
17403 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17404 rtx operands[])
17406 rtx dst = operands[0];
17407 rtx src1 = operands[1];
17408 rtx src2 = operands[2];
17410 /* Canonicalize operand order. */
17411 if (ix86_swap_binary_operands_p (code, mode, operands))
17413 rtx temp;
17415 /* It is invalid to swap operands of different modes. */
17416 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17418 temp = src1;
17419 src1 = src2;
17420 src2 = temp;
17423 /* Both source operands cannot be in memory. */
17424 if (MEM_P (src1) && MEM_P (src2))
17426 /* Optimization: Only read from memory once. */
17427 if (rtx_equal_p (src1, src2))
17429 src2 = force_reg (mode, src2);
17430 src1 = src2;
17432 else if (rtx_equal_p (dst, src1))
17433 src2 = force_reg (mode, src2);
17434 else
17435 src1 = force_reg (mode, src1);
17438 /* If the destination is memory, and we do not have matching source
17439 operands, do things in registers. */
17440 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17441 dst = gen_reg_rtx (mode);
17443 /* Source 1 cannot be a constant. */
17444 if (CONSTANT_P (src1))
17445 src1 = force_reg (mode, src1);
17447 /* Source 1 cannot be a non-matching memory. */
17448 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17449 src1 = force_reg (mode, src1);
17451 /* Improve address combine. */
17452 if (code == PLUS
17453 && GET_MODE_CLASS (mode) == MODE_INT
17454 && MEM_P (src2))
17455 src2 = force_reg (mode, src2);
17457 operands[1] = src1;
17458 operands[2] = src2;
17459 return dst;
17462 /* Similarly, but assume that the destination has already been
17463 set up properly. */
17465 void
17466 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17467 enum machine_mode mode, rtx operands[])
17469 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17470 gcc_assert (dst == operands[0]);
17473 /* Attempt to expand a binary operator. Make the expansion closer to the
17474 actual machine, then just general_operand, which will allow 3 separate
17475 memory references (one output, two input) in a single insn. */
17477 void
17478 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17479 rtx operands[])
17481 rtx src1, src2, dst, op, clob;
17483 dst = ix86_fixup_binary_operands (code, mode, operands);
17484 src1 = operands[1];
17485 src2 = operands[2];
17487 /* Emit the instruction. */
17489 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17490 if (reload_in_progress)
17492 /* Reload doesn't know about the flags register, and doesn't know that
17493 it doesn't want to clobber it. We can only do this with PLUS. */
17494 gcc_assert (code == PLUS);
17495 emit_insn (op);
17497 else if (reload_completed
17498 && code == PLUS
17499 && !rtx_equal_p (dst, src1))
17501 /* This is going to be an LEA; avoid splitting it later. */
17502 emit_insn (op);
17504 else
17506 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17507 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17510 /* Fix up the destination if needed. */
17511 if (dst != operands[0])
17512 emit_move_insn (operands[0], dst);
17515 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17516 the given OPERANDS. */
17518 void
17519 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17520 rtx operands[])
17522 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17523 if (GET_CODE (operands[1]) == SUBREG)
17525 op1 = operands[1];
17526 op2 = operands[2];
17528 else if (GET_CODE (operands[2]) == SUBREG)
17530 op1 = operands[2];
17531 op2 = operands[1];
17533 /* Optimize (__m128i) d | (__m128i) e and similar code
17534 when d and e are float vectors into float vector logical
17535 insn. In C/C++ without using intrinsics there is no other way
17536 to express vector logical operation on float vectors than
17537 to cast them temporarily to integer vectors. */
17538 if (op1
17539 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17540 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17541 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17542 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17543 && SUBREG_BYTE (op1) == 0
17544 && (GET_CODE (op2) == CONST_VECTOR
17545 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17546 && SUBREG_BYTE (op2) == 0))
17547 && can_create_pseudo_p ())
17549 rtx dst;
17550 switch (GET_MODE (SUBREG_REG (op1)))
17552 case V4SFmode:
17553 case V8SFmode:
17554 case V2DFmode:
17555 case V4DFmode:
17556 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17557 if (GET_CODE (op2) == CONST_VECTOR)
17559 op2 = gen_lowpart (GET_MODE (dst), op2);
17560 op2 = force_reg (GET_MODE (dst), op2);
17562 else
17564 op1 = operands[1];
17565 op2 = SUBREG_REG (operands[2]);
17566 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17567 op2 = force_reg (GET_MODE (dst), op2);
17569 op1 = SUBREG_REG (op1);
17570 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17571 op1 = force_reg (GET_MODE (dst), op1);
17572 emit_insn (gen_rtx_SET (VOIDmode, dst,
17573 gen_rtx_fmt_ee (code, GET_MODE (dst),
17574 op1, op2)));
17575 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17576 return;
17577 default:
17578 break;
17581 if (!nonimmediate_operand (operands[1], mode))
17582 operands[1] = force_reg (mode, operands[1]);
17583 if (!nonimmediate_operand (operands[2], mode))
17584 operands[2] = force_reg (mode, operands[2]);
17585 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17586 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17587 gen_rtx_fmt_ee (code, mode, operands[1],
17588 operands[2])));
17591 /* Return TRUE or FALSE depending on whether the binary operator meets the
17592 appropriate constraints. */
17594 bool
17595 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17596 rtx operands[3])
17598 rtx dst = operands[0];
17599 rtx src1 = operands[1];
17600 rtx src2 = operands[2];
17602 /* Both source operands cannot be in memory. */
17603 if (MEM_P (src1) && MEM_P (src2))
17604 return false;
17606 /* Canonicalize operand order for commutative operators. */
17607 if (ix86_swap_binary_operands_p (code, mode, operands))
17609 rtx temp = src1;
17610 src1 = src2;
17611 src2 = temp;
17614 /* If the destination is memory, we must have a matching source operand. */
17615 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17616 return false;
17618 /* Source 1 cannot be a constant. */
17619 if (CONSTANT_P (src1))
17620 return false;
17622 /* Source 1 cannot be a non-matching memory. */
17623 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17624 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17625 return (code == AND
17626 && (mode == HImode
17627 || mode == SImode
17628 || (TARGET_64BIT && mode == DImode))
17629 && satisfies_constraint_L (src2));
17631 return true;
17634 /* Attempt to expand a unary operator. Make the expansion closer to the
17635 actual machine, then just general_operand, which will allow 2 separate
17636 memory references (one output, one input) in a single insn. */
17638 void
17639 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17640 rtx operands[])
17642 int matching_memory;
17643 rtx src, dst, op, clob;
17645 dst = operands[0];
17646 src = operands[1];
17648 /* If the destination is memory, and we do not have matching source
17649 operands, do things in registers. */
17650 matching_memory = 0;
17651 if (MEM_P (dst))
17653 if (rtx_equal_p (dst, src))
17654 matching_memory = 1;
17655 else
17656 dst = gen_reg_rtx (mode);
17659 /* When source operand is memory, destination must match. */
17660 if (MEM_P (src) && !matching_memory)
17661 src = force_reg (mode, src);
17663 /* Emit the instruction. */
17665 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17666 if (reload_in_progress || code == NOT)
17668 /* Reload doesn't know about the flags register, and doesn't know that
17669 it doesn't want to clobber it. */
17670 gcc_assert (code == NOT);
17671 emit_insn (op);
17673 else
17675 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17676 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17679 /* Fix up the destination if needed. */
17680 if (dst != operands[0])
17681 emit_move_insn (operands[0], dst);
17684 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17685 divisor are within the range [0-255]. */
17687 void
17688 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17689 bool signed_p)
17691 rtx_code_label *end_label, *qimode_label;
17692 rtx insn, div, mod;
17693 rtx scratch, tmp0, tmp1, tmp2;
17694 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17695 rtx (*gen_zero_extend) (rtx, rtx);
17696 rtx (*gen_test_ccno_1) (rtx, rtx);
17698 switch (mode)
17700 case SImode:
17701 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17702 gen_test_ccno_1 = gen_testsi_ccno_1;
17703 gen_zero_extend = gen_zero_extendqisi2;
17704 break;
17705 case DImode:
17706 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17707 gen_test_ccno_1 = gen_testdi_ccno_1;
17708 gen_zero_extend = gen_zero_extendqidi2;
17709 break;
17710 default:
17711 gcc_unreachable ();
17714 end_label = gen_label_rtx ();
17715 qimode_label = gen_label_rtx ();
17717 scratch = gen_reg_rtx (mode);
17719 /* Use 8bit unsigned divimod if dividend and divisor are within
17720 the range [0-255]. */
17721 emit_move_insn (scratch, operands[2]);
17722 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17723 scratch, 1, OPTAB_DIRECT);
17724 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17725 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17726 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17727 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17728 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17729 pc_rtx);
17730 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17731 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17732 JUMP_LABEL (insn) = qimode_label;
17734 /* Generate original signed/unsigned divimod. */
17735 div = gen_divmod4_1 (operands[0], operands[1],
17736 operands[2], operands[3]);
17737 emit_insn (div);
17739 /* Branch to the end. */
17740 emit_jump_insn (gen_jump (end_label));
17741 emit_barrier ();
17743 /* Generate 8bit unsigned divide. */
17744 emit_label (qimode_label);
17745 /* Don't use operands[0] for result of 8bit divide since not all
17746 registers support QImode ZERO_EXTRACT. */
17747 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17748 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17749 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17750 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17752 if (signed_p)
17754 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17755 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17757 else
17759 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17760 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17763 /* Extract remainder from AH. */
17764 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17765 if (REG_P (operands[1]))
17766 insn = emit_move_insn (operands[1], tmp1);
17767 else
17769 /* Need a new scratch register since the old one has result
17770 of 8bit divide. */
17771 scratch = gen_reg_rtx (mode);
17772 emit_move_insn (scratch, tmp1);
17773 insn = emit_move_insn (operands[1], scratch);
17775 set_unique_reg_note (insn, REG_EQUAL, mod);
17777 /* Zero extend quotient from AL. */
17778 tmp1 = gen_lowpart (QImode, tmp0);
17779 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17780 set_unique_reg_note (insn, REG_EQUAL, div);
17782 emit_label (end_label);
17785 /* Whether it is OK to emit CFI directives when emitting asm code. */
17787 bool
17788 ix86_emit_cfi ()
17790 return dwarf2out_do_cfi_asm ();
17793 #define LEA_MAX_STALL (3)
17794 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17796 /* Increase given DISTANCE in half-cycles according to
17797 dependencies between PREV and NEXT instructions.
17798 Add 1 half-cycle if there is no dependency and
17799 go to next cycle if there is some dependecy. */
17801 static unsigned int
17802 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
17804 df_ref def, use;
17806 if (!prev || !next)
17807 return distance + (distance & 1) + 2;
17809 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17810 return distance + 1;
17812 FOR_EACH_INSN_USE (use, next)
17813 FOR_EACH_INSN_DEF (def, prev)
17814 if (!DF_REF_IS_ARTIFICIAL (def)
17815 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17816 return distance + (distance & 1) + 2;
17818 return distance + 1;
17821 /* Function checks if instruction INSN defines register number
17822 REGNO1 or REGNO2. */
17824 static bool
17825 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17826 rtx insn)
17828 df_ref def;
17830 FOR_EACH_INSN_DEF (def, insn)
17831 if (DF_REF_REG_DEF_P (def)
17832 && !DF_REF_IS_ARTIFICIAL (def)
17833 && (regno1 == DF_REF_REGNO (def)
17834 || regno2 == DF_REF_REGNO (def)))
17835 return true;
17837 return false;
17840 /* Function checks if instruction INSN uses register number
17841 REGNO as a part of address expression. */
17843 static bool
17844 insn_uses_reg_mem (unsigned int regno, rtx insn)
17846 df_ref use;
17848 FOR_EACH_INSN_USE (use, insn)
17849 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17850 return true;
17852 return false;
17855 /* Search backward for non-agu definition of register number REGNO1
17856 or register number REGNO2 in basic block starting from instruction
17857 START up to head of basic block or instruction INSN.
17859 Function puts true value into *FOUND var if definition was found
17860 and false otherwise.
17862 Distance in half-cycles between START and found instruction or head
17863 of BB is added to DISTANCE and returned. */
17865 static int
17866 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17867 rtx_insn *insn, int distance,
17868 rtx_insn *start, bool *found)
17870 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17871 rtx_insn *prev = start;
17872 rtx_insn *next = NULL;
17874 *found = false;
17876 while (prev
17877 && prev != insn
17878 && distance < LEA_SEARCH_THRESHOLD)
17880 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17882 distance = increase_distance (prev, next, distance);
17883 if (insn_defines_reg (regno1, regno2, prev))
17885 if (recog_memoized (prev) < 0
17886 || get_attr_type (prev) != TYPE_LEA)
17888 *found = true;
17889 return distance;
17893 next = prev;
17895 if (prev == BB_HEAD (bb))
17896 break;
17898 prev = PREV_INSN (prev);
17901 return distance;
17904 /* Search backward for non-agu definition of register number REGNO1
17905 or register number REGNO2 in INSN's basic block until
17906 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17907 2. Reach neighbour BBs boundary, or
17908 3. Reach agu definition.
17909 Returns the distance between the non-agu definition point and INSN.
17910 If no definition point, returns -1. */
17912 static int
17913 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17914 rtx_insn *insn)
17916 basic_block bb = BLOCK_FOR_INSN (insn);
17917 int distance = 0;
17918 bool found = false;
17920 if (insn != BB_HEAD (bb))
17921 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17922 distance, PREV_INSN (insn),
17923 &found);
17925 if (!found && distance < LEA_SEARCH_THRESHOLD)
17927 edge e;
17928 edge_iterator ei;
17929 bool simple_loop = false;
17931 FOR_EACH_EDGE (e, ei, bb->preds)
17932 if (e->src == bb)
17934 simple_loop = true;
17935 break;
17938 if (simple_loop)
17939 distance = distance_non_agu_define_in_bb (regno1, regno2,
17940 insn, distance,
17941 BB_END (bb), &found);
17942 else
17944 int shortest_dist = -1;
17945 bool found_in_bb = false;
17947 FOR_EACH_EDGE (e, ei, bb->preds)
17949 int bb_dist
17950 = distance_non_agu_define_in_bb (regno1, regno2,
17951 insn, distance,
17952 BB_END (e->src),
17953 &found_in_bb);
17954 if (found_in_bb)
17956 if (shortest_dist < 0)
17957 shortest_dist = bb_dist;
17958 else if (bb_dist > 0)
17959 shortest_dist = MIN (bb_dist, shortest_dist);
17961 found = true;
17965 distance = shortest_dist;
17969 /* get_attr_type may modify recog data. We want to make sure
17970 that recog data is valid for instruction INSN, on which
17971 distance_non_agu_define is called. INSN is unchanged here. */
17972 extract_insn_cached (insn);
17974 if (!found)
17975 return -1;
17977 return distance >> 1;
17980 /* Return the distance in half-cycles between INSN and the next
17981 insn that uses register number REGNO in memory address added
17982 to DISTANCE. Return -1 if REGNO0 is set.
17984 Put true value into *FOUND if register usage was found and
17985 false otherwise.
17986 Put true value into *REDEFINED if register redefinition was
17987 found and false otherwise. */
17989 static int
17990 distance_agu_use_in_bb (unsigned int regno,
17991 rtx_insn *insn, int distance, rtx_insn *start,
17992 bool *found, bool *redefined)
17994 basic_block bb = NULL;
17995 rtx_insn *next = start;
17996 rtx_insn *prev = NULL;
17998 *found = false;
17999 *redefined = false;
18001 if (start != NULL_RTX)
18003 bb = BLOCK_FOR_INSN (start);
18004 if (start != BB_HEAD (bb))
18005 /* If insn and start belong to the same bb, set prev to insn,
18006 so the call to increase_distance will increase the distance
18007 between insns by 1. */
18008 prev = insn;
18011 while (next
18012 && next != insn
18013 && distance < LEA_SEARCH_THRESHOLD)
18015 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18017 distance = increase_distance(prev, next, distance);
18018 if (insn_uses_reg_mem (regno, next))
18020 /* Return DISTANCE if OP0 is used in memory
18021 address in NEXT. */
18022 *found = true;
18023 return distance;
18026 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18028 /* Return -1 if OP0 is set in NEXT. */
18029 *redefined = true;
18030 return -1;
18033 prev = next;
18036 if (next == BB_END (bb))
18037 break;
18039 next = NEXT_INSN (next);
18042 return distance;
18045 /* Return the distance between INSN and the next insn that uses
18046 register number REGNO0 in memory address. Return -1 if no such
18047 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18049 static int
18050 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18052 basic_block bb = BLOCK_FOR_INSN (insn);
18053 int distance = 0;
18054 bool found = false;
18055 bool redefined = false;
18057 if (insn != BB_END (bb))
18058 distance = distance_agu_use_in_bb (regno0, insn, distance,
18059 NEXT_INSN (insn),
18060 &found, &redefined);
18062 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18064 edge e;
18065 edge_iterator ei;
18066 bool simple_loop = false;
18068 FOR_EACH_EDGE (e, ei, bb->succs)
18069 if (e->dest == bb)
18071 simple_loop = true;
18072 break;
18075 if (simple_loop)
18076 distance = distance_agu_use_in_bb (regno0, insn,
18077 distance, BB_HEAD (bb),
18078 &found, &redefined);
18079 else
18081 int shortest_dist = -1;
18082 bool found_in_bb = false;
18083 bool redefined_in_bb = false;
18085 FOR_EACH_EDGE (e, ei, bb->succs)
18087 int bb_dist
18088 = distance_agu_use_in_bb (regno0, insn,
18089 distance, BB_HEAD (e->dest),
18090 &found_in_bb, &redefined_in_bb);
18091 if (found_in_bb)
18093 if (shortest_dist < 0)
18094 shortest_dist = bb_dist;
18095 else if (bb_dist > 0)
18096 shortest_dist = MIN (bb_dist, shortest_dist);
18098 found = true;
18102 distance = shortest_dist;
18106 if (!found || redefined)
18107 return -1;
18109 return distance >> 1;
18112 /* Define this macro to tune LEA priority vs ADD, it take effect when
18113 there is a dilemma of choicing LEA or ADD
18114 Negative value: ADD is more preferred than LEA
18115 Zero: Netrual
18116 Positive value: LEA is more preferred than ADD*/
18117 #define IX86_LEA_PRIORITY 0
18119 /* Return true if usage of lea INSN has performance advantage
18120 over a sequence of instructions. Instructions sequence has
18121 SPLIT_COST cycles higher latency than lea latency. */
18123 static bool
18124 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18125 unsigned int regno2, int split_cost, bool has_scale)
18127 int dist_define, dist_use;
18129 /* For Silvermont if using a 2-source or 3-source LEA for
18130 non-destructive destination purposes, or due to wanting
18131 ability to use SCALE, the use of LEA is justified. */
18132 if (TARGET_SILVERMONT || TARGET_INTEL)
18134 if (has_scale)
18135 return true;
18136 if (split_cost < 1)
18137 return false;
18138 if (regno0 == regno1 || regno0 == regno2)
18139 return false;
18140 return true;
18143 dist_define = distance_non_agu_define (regno1, regno2, insn);
18144 dist_use = distance_agu_use (regno0, insn);
18146 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18148 /* If there is no non AGU operand definition, no AGU
18149 operand usage and split cost is 0 then both lea
18150 and non lea variants have same priority. Currently
18151 we prefer lea for 64 bit code and non lea on 32 bit
18152 code. */
18153 if (dist_use < 0 && split_cost == 0)
18154 return TARGET_64BIT || IX86_LEA_PRIORITY;
18155 else
18156 return true;
18159 /* With longer definitions distance lea is more preferable.
18160 Here we change it to take into account splitting cost and
18161 lea priority. */
18162 dist_define += split_cost + IX86_LEA_PRIORITY;
18164 /* If there is no use in memory addess then we just check
18165 that split cost exceeds AGU stall. */
18166 if (dist_use < 0)
18167 return dist_define > LEA_MAX_STALL;
18169 /* If this insn has both backward non-agu dependence and forward
18170 agu dependence, the one with short distance takes effect. */
18171 return dist_define >= dist_use;
18174 /* Return true if it is legal to clobber flags by INSN and
18175 false otherwise. */
18177 static bool
18178 ix86_ok_to_clobber_flags (rtx_insn *insn)
18180 basic_block bb = BLOCK_FOR_INSN (insn);
18181 df_ref use;
18182 bitmap live;
18184 while (insn)
18186 if (NONDEBUG_INSN_P (insn))
18188 FOR_EACH_INSN_USE (use, insn)
18189 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18190 return false;
18192 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18193 return true;
18196 if (insn == BB_END (bb))
18197 break;
18199 insn = NEXT_INSN (insn);
18202 live = df_get_live_out(bb);
18203 return !REGNO_REG_SET_P (live, FLAGS_REG);
18206 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18207 move and add to avoid AGU stalls. */
18209 bool
18210 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18212 unsigned int regno0, regno1, regno2;
18214 /* Check if we need to optimize. */
18215 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18216 return false;
18218 /* Check it is correct to split here. */
18219 if (!ix86_ok_to_clobber_flags(insn))
18220 return false;
18222 regno0 = true_regnum (operands[0]);
18223 regno1 = true_regnum (operands[1]);
18224 regno2 = true_regnum (operands[2]);
18226 /* We need to split only adds with non destructive
18227 destination operand. */
18228 if (regno0 == regno1 || regno0 == regno2)
18229 return false;
18230 else
18231 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18234 /* Return true if we should emit lea instruction instead of mov
18235 instruction. */
18237 bool
18238 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18240 unsigned int regno0, regno1;
18242 /* Check if we need to optimize. */
18243 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18244 return false;
18246 /* Use lea for reg to reg moves only. */
18247 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18248 return false;
18250 regno0 = true_regnum (operands[0]);
18251 regno1 = true_regnum (operands[1]);
18253 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18256 /* Return true if we need to split lea into a sequence of
18257 instructions to avoid AGU stalls. */
18259 bool
18260 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18262 unsigned int regno0, regno1, regno2;
18263 int split_cost;
18264 struct ix86_address parts;
18265 int ok;
18267 /* Check we need to optimize. */
18268 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18269 return false;
18271 /* The "at least two components" test below might not catch simple
18272 move or zero extension insns if parts.base is non-NULL and parts.disp
18273 is const0_rtx as the only components in the address, e.g. if the
18274 register is %rbp or %r13. As this test is much cheaper and moves or
18275 zero extensions are the common case, do this check first. */
18276 if (REG_P (operands[1])
18277 || (SImode_address_operand (operands[1], VOIDmode)
18278 && REG_P (XEXP (operands[1], 0))))
18279 return false;
18281 /* Check if it is OK to split here. */
18282 if (!ix86_ok_to_clobber_flags (insn))
18283 return false;
18285 ok = ix86_decompose_address (operands[1], &parts);
18286 gcc_assert (ok);
18288 /* There should be at least two components in the address. */
18289 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18290 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18291 return false;
18293 /* We should not split into add if non legitimate pic
18294 operand is used as displacement. */
18295 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18296 return false;
18298 regno0 = true_regnum (operands[0]) ;
18299 regno1 = INVALID_REGNUM;
18300 regno2 = INVALID_REGNUM;
18302 if (parts.base)
18303 regno1 = true_regnum (parts.base);
18304 if (parts.index)
18305 regno2 = true_regnum (parts.index);
18307 split_cost = 0;
18309 /* Compute how many cycles we will add to execution time
18310 if split lea into a sequence of instructions. */
18311 if (parts.base || parts.index)
18313 /* Have to use mov instruction if non desctructive
18314 destination form is used. */
18315 if (regno1 != regno0 && regno2 != regno0)
18316 split_cost += 1;
18318 /* Have to add index to base if both exist. */
18319 if (parts.base && parts.index)
18320 split_cost += 1;
18322 /* Have to use shift and adds if scale is 2 or greater. */
18323 if (parts.scale > 1)
18325 if (regno0 != regno1)
18326 split_cost += 1;
18327 else if (regno2 == regno0)
18328 split_cost += 4;
18329 else
18330 split_cost += parts.scale;
18333 /* Have to use add instruction with immediate if
18334 disp is non zero. */
18335 if (parts.disp && parts.disp != const0_rtx)
18336 split_cost += 1;
18338 /* Subtract the price of lea. */
18339 split_cost -= 1;
18342 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18343 parts.scale > 1);
18346 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18347 matches destination. RTX includes clobber of FLAGS_REG. */
18349 static void
18350 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18351 rtx dst, rtx src)
18353 rtx op, clob;
18355 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18356 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18358 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18361 /* Return true if regno1 def is nearest to the insn. */
18363 static bool
18364 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18366 rtx_insn *prev = insn;
18367 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18369 if (insn == start)
18370 return false;
18371 while (prev && prev != start)
18373 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18375 prev = PREV_INSN (prev);
18376 continue;
18378 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18379 return true;
18380 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18381 return false;
18382 prev = PREV_INSN (prev);
18385 /* None of the regs is defined in the bb. */
18386 return false;
18389 /* Split lea instructions into a sequence of instructions
18390 which are executed on ALU to avoid AGU stalls.
18391 It is assumed that it is allowed to clobber flags register
18392 at lea position. */
18394 void
18395 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], enum machine_mode mode)
18397 unsigned int regno0, regno1, regno2;
18398 struct ix86_address parts;
18399 rtx target, tmp;
18400 int ok, adds;
18402 ok = ix86_decompose_address (operands[1], &parts);
18403 gcc_assert (ok);
18405 target = gen_lowpart (mode, operands[0]);
18407 regno0 = true_regnum (target);
18408 regno1 = INVALID_REGNUM;
18409 regno2 = INVALID_REGNUM;
18411 if (parts.base)
18413 parts.base = gen_lowpart (mode, parts.base);
18414 regno1 = true_regnum (parts.base);
18417 if (parts.index)
18419 parts.index = gen_lowpart (mode, parts.index);
18420 regno2 = true_regnum (parts.index);
18423 if (parts.disp)
18424 parts.disp = gen_lowpart (mode, parts.disp);
18426 if (parts.scale > 1)
18428 /* Case r1 = r1 + ... */
18429 if (regno1 == regno0)
18431 /* If we have a case r1 = r1 + C * r2 then we
18432 should use multiplication which is very
18433 expensive. Assume cost model is wrong if we
18434 have such case here. */
18435 gcc_assert (regno2 != regno0);
18437 for (adds = parts.scale; adds > 0; adds--)
18438 ix86_emit_binop (PLUS, mode, target, parts.index);
18440 else
18442 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18443 if (regno0 != regno2)
18444 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18446 /* Use shift for scaling. */
18447 ix86_emit_binop (ASHIFT, mode, target,
18448 GEN_INT (exact_log2 (parts.scale)));
18450 if (parts.base)
18451 ix86_emit_binop (PLUS, mode, target, parts.base);
18453 if (parts.disp && parts.disp != const0_rtx)
18454 ix86_emit_binop (PLUS, mode, target, parts.disp);
18457 else if (!parts.base && !parts.index)
18459 gcc_assert(parts.disp);
18460 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18462 else
18464 if (!parts.base)
18466 if (regno0 != regno2)
18467 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18469 else if (!parts.index)
18471 if (regno0 != regno1)
18472 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18474 else
18476 if (regno0 == regno1)
18477 tmp = parts.index;
18478 else if (regno0 == regno2)
18479 tmp = parts.base;
18480 else
18482 rtx tmp1;
18484 /* Find better operand for SET instruction, depending
18485 on which definition is farther from the insn. */
18486 if (find_nearest_reg_def (insn, regno1, regno2))
18487 tmp = parts.index, tmp1 = parts.base;
18488 else
18489 tmp = parts.base, tmp1 = parts.index;
18491 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18493 if (parts.disp && parts.disp != const0_rtx)
18494 ix86_emit_binop (PLUS, mode, target, parts.disp);
18496 ix86_emit_binop (PLUS, mode, target, tmp1);
18497 return;
18500 ix86_emit_binop (PLUS, mode, target, tmp);
18503 if (parts.disp && parts.disp != const0_rtx)
18504 ix86_emit_binop (PLUS, mode, target, parts.disp);
18508 /* Return true if it is ok to optimize an ADD operation to LEA
18509 operation to avoid flag register consumation. For most processors,
18510 ADD is faster than LEA. For the processors like BONNELL, if the
18511 destination register of LEA holds an actual address which will be
18512 used soon, LEA is better and otherwise ADD is better. */
18514 bool
18515 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18517 unsigned int regno0 = true_regnum (operands[0]);
18518 unsigned int regno1 = true_regnum (operands[1]);
18519 unsigned int regno2 = true_regnum (operands[2]);
18521 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18522 if (regno0 != regno1 && regno0 != regno2)
18523 return true;
18525 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18526 return false;
18528 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18531 /* Return true if destination reg of SET_BODY is shift count of
18532 USE_BODY. */
18534 static bool
18535 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18537 rtx set_dest;
18538 rtx shift_rtx;
18539 int i;
18541 /* Retrieve destination of SET_BODY. */
18542 switch (GET_CODE (set_body))
18544 case SET:
18545 set_dest = SET_DEST (set_body);
18546 if (!set_dest || !REG_P (set_dest))
18547 return false;
18548 break;
18549 case PARALLEL:
18550 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18551 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18552 use_body))
18553 return true;
18554 default:
18555 return false;
18556 break;
18559 /* Retrieve shift count of USE_BODY. */
18560 switch (GET_CODE (use_body))
18562 case SET:
18563 shift_rtx = XEXP (use_body, 1);
18564 break;
18565 case PARALLEL:
18566 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18567 if (ix86_dep_by_shift_count_body (set_body,
18568 XVECEXP (use_body, 0, i)))
18569 return true;
18570 default:
18571 return false;
18572 break;
18575 if (shift_rtx
18576 && (GET_CODE (shift_rtx) == ASHIFT
18577 || GET_CODE (shift_rtx) == LSHIFTRT
18578 || GET_CODE (shift_rtx) == ASHIFTRT
18579 || GET_CODE (shift_rtx) == ROTATE
18580 || GET_CODE (shift_rtx) == ROTATERT))
18582 rtx shift_count = XEXP (shift_rtx, 1);
18584 /* Return true if shift count is dest of SET_BODY. */
18585 if (REG_P (shift_count))
18587 /* Add check since it can be invoked before register
18588 allocation in pre-reload schedule. */
18589 if (reload_completed
18590 && true_regnum (set_dest) == true_regnum (shift_count))
18591 return true;
18592 else if (REGNO(set_dest) == REGNO(shift_count))
18593 return true;
18597 return false;
18600 /* Return true if destination reg of SET_INSN is shift count of
18601 USE_INSN. */
18603 bool
18604 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18606 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18607 PATTERN (use_insn));
18610 /* Return TRUE or FALSE depending on whether the unary operator meets the
18611 appropriate constraints. */
18613 bool
18614 ix86_unary_operator_ok (enum rtx_code,
18615 enum machine_mode,
18616 rtx operands[2])
18618 /* If one of operands is memory, source and destination must match. */
18619 if ((MEM_P (operands[0])
18620 || MEM_P (operands[1]))
18621 && ! rtx_equal_p (operands[0], operands[1]))
18622 return false;
18623 return true;
18626 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18627 are ok, keeping in mind the possible movddup alternative. */
18629 bool
18630 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18632 if (MEM_P (operands[0]))
18633 return rtx_equal_p (operands[0], operands[1 + high]);
18634 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18635 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18636 return true;
18639 /* Post-reload splitter for converting an SF or DFmode value in an
18640 SSE register into an unsigned SImode. */
18642 void
18643 ix86_split_convert_uns_si_sse (rtx operands[])
18645 enum machine_mode vecmode;
18646 rtx value, large, zero_or_two31, input, two31, x;
18648 large = operands[1];
18649 zero_or_two31 = operands[2];
18650 input = operands[3];
18651 two31 = operands[4];
18652 vecmode = GET_MODE (large);
18653 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18655 /* Load up the value into the low element. We must ensure that the other
18656 elements are valid floats -- zero is the easiest such value. */
18657 if (MEM_P (input))
18659 if (vecmode == V4SFmode)
18660 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18661 else
18662 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18664 else
18666 input = gen_rtx_REG (vecmode, REGNO (input));
18667 emit_move_insn (value, CONST0_RTX (vecmode));
18668 if (vecmode == V4SFmode)
18669 emit_insn (gen_sse_movss (value, value, input));
18670 else
18671 emit_insn (gen_sse2_movsd (value, value, input));
18674 emit_move_insn (large, two31);
18675 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18677 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18678 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18680 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18681 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18683 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18684 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18686 large = gen_rtx_REG (V4SImode, REGNO (large));
18687 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18689 x = gen_rtx_REG (V4SImode, REGNO (value));
18690 if (vecmode == V4SFmode)
18691 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18692 else
18693 emit_insn (gen_sse2_cvttpd2dq (x, value));
18694 value = x;
18696 emit_insn (gen_xorv4si3 (value, value, large));
18699 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18700 Expects the 64-bit DImode to be supplied in a pair of integral
18701 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18702 -mfpmath=sse, !optimize_size only. */
18704 void
18705 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18707 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18708 rtx int_xmm, fp_xmm;
18709 rtx biases, exponents;
18710 rtx x;
18712 int_xmm = gen_reg_rtx (V4SImode);
18713 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18714 emit_insn (gen_movdi_to_sse (int_xmm, input));
18715 else if (TARGET_SSE_SPLIT_REGS)
18717 emit_clobber (int_xmm);
18718 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18720 else
18722 x = gen_reg_rtx (V2DImode);
18723 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18724 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18727 x = gen_rtx_CONST_VECTOR (V4SImode,
18728 gen_rtvec (4, GEN_INT (0x43300000UL),
18729 GEN_INT (0x45300000UL),
18730 const0_rtx, const0_rtx));
18731 exponents = validize_mem (force_const_mem (V4SImode, x));
18733 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18734 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18736 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18737 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18738 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18739 (0x1.0p84 + double(fp_value_hi_xmm)).
18740 Note these exponents differ by 32. */
18742 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18744 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18745 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18746 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18747 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18748 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18749 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18750 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18751 biases = validize_mem (force_const_mem (V2DFmode, biases));
18752 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18754 /* Add the upper and lower DFmode values together. */
18755 if (TARGET_SSE3)
18756 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18757 else
18759 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18760 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18761 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18764 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18767 /* Not used, but eases macroization of patterns. */
18768 void
18769 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18771 gcc_unreachable ();
18774 /* Convert an unsigned SImode value into a DFmode. Only currently used
18775 for SSE, but applicable anywhere. */
18777 void
18778 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18780 REAL_VALUE_TYPE TWO31r;
18781 rtx x, fp;
18783 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18784 NULL, 1, OPTAB_DIRECT);
18786 fp = gen_reg_rtx (DFmode);
18787 emit_insn (gen_floatsidf2 (fp, x));
18789 real_ldexp (&TWO31r, &dconst1, 31);
18790 x = const_double_from_real_value (TWO31r, DFmode);
18792 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18793 if (x != target)
18794 emit_move_insn (target, x);
18797 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18798 32-bit mode; otherwise we have a direct convert instruction. */
18800 void
18801 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18803 REAL_VALUE_TYPE TWO32r;
18804 rtx fp_lo, fp_hi, x;
18806 fp_lo = gen_reg_rtx (DFmode);
18807 fp_hi = gen_reg_rtx (DFmode);
18809 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18811 real_ldexp (&TWO32r, &dconst1, 32);
18812 x = const_double_from_real_value (TWO32r, DFmode);
18813 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18815 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18817 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18818 0, OPTAB_DIRECT);
18819 if (x != target)
18820 emit_move_insn (target, x);
18823 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18824 For x86_32, -mfpmath=sse, !optimize_size only. */
18825 void
18826 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18828 REAL_VALUE_TYPE ONE16r;
18829 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18831 real_ldexp (&ONE16r, &dconst1, 16);
18832 x = const_double_from_real_value (ONE16r, SFmode);
18833 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18834 NULL, 0, OPTAB_DIRECT);
18835 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18836 NULL, 0, OPTAB_DIRECT);
18837 fp_hi = gen_reg_rtx (SFmode);
18838 fp_lo = gen_reg_rtx (SFmode);
18839 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18840 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18841 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18842 0, OPTAB_DIRECT);
18843 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18844 0, OPTAB_DIRECT);
18845 if (!rtx_equal_p (target, fp_hi))
18846 emit_move_insn (target, fp_hi);
18849 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18850 a vector of unsigned ints VAL to vector of floats TARGET. */
18852 void
18853 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18855 rtx tmp[8];
18856 REAL_VALUE_TYPE TWO16r;
18857 enum machine_mode intmode = GET_MODE (val);
18858 enum machine_mode fltmode = GET_MODE (target);
18859 rtx (*cvt) (rtx, rtx);
18861 if (intmode == V4SImode)
18862 cvt = gen_floatv4siv4sf2;
18863 else
18864 cvt = gen_floatv8siv8sf2;
18865 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18866 tmp[0] = force_reg (intmode, tmp[0]);
18867 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18868 OPTAB_DIRECT);
18869 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18870 NULL_RTX, 1, OPTAB_DIRECT);
18871 tmp[3] = gen_reg_rtx (fltmode);
18872 emit_insn (cvt (tmp[3], tmp[1]));
18873 tmp[4] = gen_reg_rtx (fltmode);
18874 emit_insn (cvt (tmp[4], tmp[2]));
18875 real_ldexp (&TWO16r, &dconst1, 16);
18876 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18877 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18878 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18879 OPTAB_DIRECT);
18880 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18881 OPTAB_DIRECT);
18882 if (tmp[7] != target)
18883 emit_move_insn (target, tmp[7]);
18886 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18887 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18888 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18889 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18892 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18894 REAL_VALUE_TYPE TWO31r;
18895 rtx two31r, tmp[4];
18896 enum machine_mode mode = GET_MODE (val);
18897 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18898 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18899 rtx (*cmp) (rtx, rtx, rtx, rtx);
18900 int i;
18902 for (i = 0; i < 3; i++)
18903 tmp[i] = gen_reg_rtx (mode);
18904 real_ldexp (&TWO31r, &dconst1, 31);
18905 two31r = const_double_from_real_value (TWO31r, scalarmode);
18906 two31r = ix86_build_const_vector (mode, 1, two31r);
18907 two31r = force_reg (mode, two31r);
18908 switch (mode)
18910 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18911 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18912 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18913 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18914 default: gcc_unreachable ();
18916 tmp[3] = gen_rtx_LE (mode, two31r, val);
18917 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18918 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18919 0, OPTAB_DIRECT);
18920 if (intmode == V4SImode || TARGET_AVX2)
18921 *xorp = expand_simple_binop (intmode, ASHIFT,
18922 gen_lowpart (intmode, tmp[0]),
18923 GEN_INT (31), NULL_RTX, 0,
18924 OPTAB_DIRECT);
18925 else
18927 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18928 two31 = ix86_build_const_vector (intmode, 1, two31);
18929 *xorp = expand_simple_binop (intmode, AND,
18930 gen_lowpart (intmode, tmp[0]),
18931 two31, NULL_RTX, 0,
18932 OPTAB_DIRECT);
18934 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18935 0, OPTAB_DIRECT);
18938 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18939 then replicate the value for all elements of the vector
18940 register. */
18943 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18945 int i, n_elt;
18946 rtvec v;
18947 enum machine_mode scalar_mode;
18949 switch (mode)
18951 case V64QImode:
18952 case V32QImode:
18953 case V16QImode:
18954 case V32HImode:
18955 case V16HImode:
18956 case V8HImode:
18957 case V16SImode:
18958 case V8SImode:
18959 case V4SImode:
18960 case V8DImode:
18961 case V4DImode:
18962 case V2DImode:
18963 gcc_assert (vect);
18964 case V16SFmode:
18965 case V8SFmode:
18966 case V4SFmode:
18967 case V8DFmode:
18968 case V4DFmode:
18969 case V2DFmode:
18970 n_elt = GET_MODE_NUNITS (mode);
18971 v = rtvec_alloc (n_elt);
18972 scalar_mode = GET_MODE_INNER (mode);
18974 RTVEC_ELT (v, 0) = value;
18976 for (i = 1; i < n_elt; ++i)
18977 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18979 return gen_rtx_CONST_VECTOR (mode, v);
18981 default:
18982 gcc_unreachable ();
18986 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18987 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18988 for an SSE register. If VECT is true, then replicate the mask for
18989 all elements of the vector register. If INVERT is true, then create
18990 a mask excluding the sign bit. */
18993 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18995 enum machine_mode vec_mode, imode;
18996 HOST_WIDE_INT hi, lo;
18997 int shift = 63;
18998 rtx v;
18999 rtx mask;
19001 /* Find the sign bit, sign extended to 2*HWI. */
19002 switch (mode)
19004 case V16SImode:
19005 case V16SFmode:
19006 case V8SImode:
19007 case V4SImode:
19008 case V8SFmode:
19009 case V4SFmode:
19010 vec_mode = mode;
19011 mode = GET_MODE_INNER (mode);
19012 imode = SImode;
19013 lo = 0x80000000, hi = lo < 0;
19014 break;
19016 case V8DImode:
19017 case V4DImode:
19018 case V2DImode:
19019 case V8DFmode:
19020 case V4DFmode:
19021 case V2DFmode:
19022 vec_mode = mode;
19023 mode = GET_MODE_INNER (mode);
19024 imode = DImode;
19025 if (HOST_BITS_PER_WIDE_INT >= 64)
19026 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19027 else
19028 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19029 break;
19031 case TImode:
19032 case TFmode:
19033 vec_mode = VOIDmode;
19034 if (HOST_BITS_PER_WIDE_INT >= 64)
19036 imode = TImode;
19037 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19039 else
19041 rtvec vec;
19043 imode = DImode;
19044 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19046 if (invert)
19048 lo = ~lo, hi = ~hi;
19049 v = constm1_rtx;
19051 else
19052 v = const0_rtx;
19054 mask = immed_double_const (lo, hi, imode);
19056 vec = gen_rtvec (2, v, mask);
19057 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19058 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19060 return v;
19062 break;
19064 default:
19065 gcc_unreachable ();
19068 if (invert)
19069 lo = ~lo, hi = ~hi;
19071 /* Force this value into the low part of a fp vector constant. */
19072 mask = immed_double_const (lo, hi, imode);
19073 mask = gen_lowpart (mode, mask);
19075 if (vec_mode == VOIDmode)
19076 return force_reg (mode, mask);
19078 v = ix86_build_const_vector (vec_mode, vect, mask);
19079 return force_reg (vec_mode, v);
19082 /* Generate code for floating point ABS or NEG. */
19084 void
19085 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19086 rtx operands[])
19088 rtx mask, set, dst, src;
19089 bool use_sse = false;
19090 bool vector_mode = VECTOR_MODE_P (mode);
19091 enum machine_mode vmode = mode;
19093 if (vector_mode)
19094 use_sse = true;
19095 else if (mode == TFmode)
19096 use_sse = true;
19097 else if (TARGET_SSE_MATH)
19099 use_sse = SSE_FLOAT_MODE_P (mode);
19100 if (mode == SFmode)
19101 vmode = V4SFmode;
19102 else if (mode == DFmode)
19103 vmode = V2DFmode;
19106 /* NEG and ABS performed with SSE use bitwise mask operations.
19107 Create the appropriate mask now. */
19108 if (use_sse)
19109 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19110 else
19111 mask = NULL_RTX;
19113 dst = operands[0];
19114 src = operands[1];
19116 set = gen_rtx_fmt_e (code, mode, src);
19117 set = gen_rtx_SET (VOIDmode, dst, set);
19119 if (mask)
19121 rtx use, clob;
19122 rtvec par;
19124 use = gen_rtx_USE (VOIDmode, mask);
19125 if (vector_mode)
19126 par = gen_rtvec (2, set, use);
19127 else
19129 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19130 par = gen_rtvec (3, set, use, clob);
19132 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19134 else
19135 emit_insn (set);
19138 /* Expand a copysign operation. Special case operand 0 being a constant. */
19140 void
19141 ix86_expand_copysign (rtx operands[])
19143 enum machine_mode mode, vmode;
19144 rtx dest, op0, op1, mask, nmask;
19146 dest = operands[0];
19147 op0 = operands[1];
19148 op1 = operands[2];
19150 mode = GET_MODE (dest);
19152 if (mode == SFmode)
19153 vmode = V4SFmode;
19154 else if (mode == DFmode)
19155 vmode = V2DFmode;
19156 else
19157 vmode = mode;
19159 if (GET_CODE (op0) == CONST_DOUBLE)
19161 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19163 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19164 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19166 if (mode == SFmode || mode == DFmode)
19168 if (op0 == CONST0_RTX (mode))
19169 op0 = CONST0_RTX (vmode);
19170 else
19172 rtx v = ix86_build_const_vector (vmode, false, op0);
19174 op0 = force_reg (vmode, v);
19177 else if (op0 != CONST0_RTX (mode))
19178 op0 = force_reg (mode, op0);
19180 mask = ix86_build_signbit_mask (vmode, 0, 0);
19182 if (mode == SFmode)
19183 copysign_insn = gen_copysignsf3_const;
19184 else if (mode == DFmode)
19185 copysign_insn = gen_copysigndf3_const;
19186 else
19187 copysign_insn = gen_copysigntf3_const;
19189 emit_insn (copysign_insn (dest, op0, op1, mask));
19191 else
19193 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19195 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19196 mask = ix86_build_signbit_mask (vmode, 0, 0);
19198 if (mode == SFmode)
19199 copysign_insn = gen_copysignsf3_var;
19200 else if (mode == DFmode)
19201 copysign_insn = gen_copysigndf3_var;
19202 else
19203 copysign_insn = gen_copysigntf3_var;
19205 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19209 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19210 be a constant, and so has already been expanded into a vector constant. */
19212 void
19213 ix86_split_copysign_const (rtx operands[])
19215 enum machine_mode mode, vmode;
19216 rtx dest, op0, mask, x;
19218 dest = operands[0];
19219 op0 = operands[1];
19220 mask = operands[3];
19222 mode = GET_MODE (dest);
19223 vmode = GET_MODE (mask);
19225 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19226 x = gen_rtx_AND (vmode, dest, mask);
19227 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19229 if (op0 != CONST0_RTX (vmode))
19231 x = gen_rtx_IOR (vmode, dest, op0);
19232 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19236 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19237 so we have to do two masks. */
19239 void
19240 ix86_split_copysign_var (rtx operands[])
19242 enum machine_mode mode, vmode;
19243 rtx dest, scratch, op0, op1, mask, nmask, x;
19245 dest = operands[0];
19246 scratch = operands[1];
19247 op0 = operands[2];
19248 op1 = operands[3];
19249 nmask = operands[4];
19250 mask = operands[5];
19252 mode = GET_MODE (dest);
19253 vmode = GET_MODE (mask);
19255 if (rtx_equal_p (op0, op1))
19257 /* Shouldn't happen often (it's useless, obviously), but when it does
19258 we'd generate incorrect code if we continue below. */
19259 emit_move_insn (dest, op0);
19260 return;
19263 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19265 gcc_assert (REGNO (op1) == REGNO (scratch));
19267 x = gen_rtx_AND (vmode, scratch, mask);
19268 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19270 dest = mask;
19271 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19272 x = gen_rtx_NOT (vmode, dest);
19273 x = gen_rtx_AND (vmode, x, op0);
19274 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19276 else
19278 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19280 x = gen_rtx_AND (vmode, scratch, mask);
19282 else /* alternative 2,4 */
19284 gcc_assert (REGNO (mask) == REGNO (scratch));
19285 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19286 x = gen_rtx_AND (vmode, scratch, op1);
19288 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19290 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19292 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19293 x = gen_rtx_AND (vmode, dest, nmask);
19295 else /* alternative 3,4 */
19297 gcc_assert (REGNO (nmask) == REGNO (dest));
19298 dest = nmask;
19299 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19300 x = gen_rtx_AND (vmode, dest, op0);
19302 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19305 x = gen_rtx_IOR (vmode, dest, scratch);
19306 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19309 /* Return TRUE or FALSE depending on whether the first SET in INSN
19310 has source and destination with matching CC modes, and that the
19311 CC mode is at least as constrained as REQ_MODE. */
19313 bool
19314 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19316 rtx set;
19317 enum machine_mode set_mode;
19319 set = PATTERN (insn);
19320 if (GET_CODE (set) == PARALLEL)
19321 set = XVECEXP (set, 0, 0);
19322 gcc_assert (GET_CODE (set) == SET);
19323 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19325 set_mode = GET_MODE (SET_DEST (set));
19326 switch (set_mode)
19328 case CCNOmode:
19329 if (req_mode != CCNOmode
19330 && (req_mode != CCmode
19331 || XEXP (SET_SRC (set), 1) != const0_rtx))
19332 return false;
19333 break;
19334 case CCmode:
19335 if (req_mode == CCGCmode)
19336 return false;
19337 /* FALLTHRU */
19338 case CCGCmode:
19339 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19340 return false;
19341 /* FALLTHRU */
19342 case CCGOCmode:
19343 if (req_mode == CCZmode)
19344 return false;
19345 /* FALLTHRU */
19346 case CCZmode:
19347 break;
19349 case CCAmode:
19350 case CCCmode:
19351 case CCOmode:
19352 case CCSmode:
19353 if (set_mode != req_mode)
19354 return false;
19355 break;
19357 default:
19358 gcc_unreachable ();
19361 return GET_MODE (SET_SRC (set)) == set_mode;
19364 /* Generate insn patterns to do an integer compare of OPERANDS. */
19366 static rtx
19367 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19369 enum machine_mode cmpmode;
19370 rtx tmp, flags;
19372 cmpmode = SELECT_CC_MODE (code, op0, op1);
19373 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19375 /* This is very simple, but making the interface the same as in the
19376 FP case makes the rest of the code easier. */
19377 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19378 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19380 /* Return the test that should be put into the flags user, i.e.
19381 the bcc, scc, or cmov instruction. */
19382 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19385 /* Figure out whether to use ordered or unordered fp comparisons.
19386 Return the appropriate mode to use. */
19388 enum machine_mode
19389 ix86_fp_compare_mode (enum rtx_code)
19391 /* ??? In order to make all comparisons reversible, we do all comparisons
19392 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19393 all forms trapping and nontrapping comparisons, we can make inequality
19394 comparisons trapping again, since it results in better code when using
19395 FCOM based compares. */
19396 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19399 enum machine_mode
19400 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19402 enum machine_mode mode = GET_MODE (op0);
19404 if (SCALAR_FLOAT_MODE_P (mode))
19406 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19407 return ix86_fp_compare_mode (code);
19410 switch (code)
19412 /* Only zero flag is needed. */
19413 case EQ: /* ZF=0 */
19414 case NE: /* ZF!=0 */
19415 return CCZmode;
19416 /* Codes needing carry flag. */
19417 case GEU: /* CF=0 */
19418 case LTU: /* CF=1 */
19419 /* Detect overflow checks. They need just the carry flag. */
19420 if (GET_CODE (op0) == PLUS
19421 && rtx_equal_p (op1, XEXP (op0, 0)))
19422 return CCCmode;
19423 else
19424 return CCmode;
19425 case GTU: /* CF=0 & ZF=0 */
19426 case LEU: /* CF=1 | ZF=1 */
19427 return CCmode;
19428 /* Codes possibly doable only with sign flag when
19429 comparing against zero. */
19430 case GE: /* SF=OF or SF=0 */
19431 case LT: /* SF<>OF or SF=1 */
19432 if (op1 == const0_rtx)
19433 return CCGOCmode;
19434 else
19435 /* For other cases Carry flag is not required. */
19436 return CCGCmode;
19437 /* Codes doable only with sign flag when comparing
19438 against zero, but we miss jump instruction for it
19439 so we need to use relational tests against overflow
19440 that thus needs to be zero. */
19441 case GT: /* ZF=0 & SF=OF */
19442 case LE: /* ZF=1 | SF<>OF */
19443 if (op1 == const0_rtx)
19444 return CCNOmode;
19445 else
19446 return CCGCmode;
19447 /* strcmp pattern do (use flags) and combine may ask us for proper
19448 mode. */
19449 case USE:
19450 return CCmode;
19451 default:
19452 gcc_unreachable ();
19456 /* Return the fixed registers used for condition codes. */
19458 static bool
19459 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19461 *p1 = FLAGS_REG;
19462 *p2 = FPSR_REG;
19463 return true;
19466 /* If two condition code modes are compatible, return a condition code
19467 mode which is compatible with both. Otherwise, return
19468 VOIDmode. */
19470 static enum machine_mode
19471 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19473 if (m1 == m2)
19474 return m1;
19476 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19477 return VOIDmode;
19479 if ((m1 == CCGCmode && m2 == CCGOCmode)
19480 || (m1 == CCGOCmode && m2 == CCGCmode))
19481 return CCGCmode;
19483 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19484 return m2;
19485 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19486 return m1;
19488 switch (m1)
19490 default:
19491 gcc_unreachable ();
19493 case CCmode:
19494 case CCGCmode:
19495 case CCGOCmode:
19496 case CCNOmode:
19497 case CCAmode:
19498 case CCCmode:
19499 case CCOmode:
19500 case CCSmode:
19501 case CCZmode:
19502 switch (m2)
19504 default:
19505 return VOIDmode;
19507 case CCmode:
19508 case CCGCmode:
19509 case CCGOCmode:
19510 case CCNOmode:
19511 case CCAmode:
19512 case CCCmode:
19513 case CCOmode:
19514 case CCSmode:
19515 case CCZmode:
19516 return CCmode;
19519 case CCFPmode:
19520 case CCFPUmode:
19521 /* These are only compatible with themselves, which we already
19522 checked above. */
19523 return VOIDmode;
19528 /* Return a comparison we can do and that it is equivalent to
19529 swap_condition (code) apart possibly from orderedness.
19530 But, never change orderedness if TARGET_IEEE_FP, returning
19531 UNKNOWN in that case if necessary. */
19533 static enum rtx_code
19534 ix86_fp_swap_condition (enum rtx_code code)
19536 switch (code)
19538 case GT: /* GTU - CF=0 & ZF=0 */
19539 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19540 case GE: /* GEU - CF=0 */
19541 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19542 case UNLT: /* LTU - CF=1 */
19543 return TARGET_IEEE_FP ? UNKNOWN : GT;
19544 case UNLE: /* LEU - CF=1 | ZF=1 */
19545 return TARGET_IEEE_FP ? UNKNOWN : GE;
19546 default:
19547 return swap_condition (code);
19551 /* Return cost of comparison CODE using the best strategy for performance.
19552 All following functions do use number of instructions as a cost metrics.
19553 In future this should be tweaked to compute bytes for optimize_size and
19554 take into account performance of various instructions on various CPUs. */
19556 static int
19557 ix86_fp_comparison_cost (enum rtx_code code)
19559 int arith_cost;
19561 /* The cost of code using bit-twiddling on %ah. */
19562 switch (code)
19564 case UNLE:
19565 case UNLT:
19566 case LTGT:
19567 case GT:
19568 case GE:
19569 case UNORDERED:
19570 case ORDERED:
19571 case UNEQ:
19572 arith_cost = 4;
19573 break;
19574 case LT:
19575 case NE:
19576 case EQ:
19577 case UNGE:
19578 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19579 break;
19580 case LE:
19581 case UNGT:
19582 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19583 break;
19584 default:
19585 gcc_unreachable ();
19588 switch (ix86_fp_comparison_strategy (code))
19590 case IX86_FPCMP_COMI:
19591 return arith_cost > 4 ? 3 : 2;
19592 case IX86_FPCMP_SAHF:
19593 return arith_cost > 4 ? 4 : 3;
19594 default:
19595 return arith_cost;
19599 /* Return strategy to use for floating-point. We assume that fcomi is always
19600 preferrable where available, since that is also true when looking at size
19601 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19603 enum ix86_fpcmp_strategy
19604 ix86_fp_comparison_strategy (enum rtx_code)
19606 /* Do fcomi/sahf based test when profitable. */
19608 if (TARGET_CMOVE)
19609 return IX86_FPCMP_COMI;
19611 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19612 return IX86_FPCMP_SAHF;
19614 return IX86_FPCMP_ARITH;
19617 /* Swap, force into registers, or otherwise massage the two operands
19618 to a fp comparison. The operands are updated in place; the new
19619 comparison code is returned. */
19621 static enum rtx_code
19622 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19624 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19625 rtx op0 = *pop0, op1 = *pop1;
19626 enum machine_mode op_mode = GET_MODE (op0);
19627 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19629 /* All of the unordered compare instructions only work on registers.
19630 The same is true of the fcomi compare instructions. The XFmode
19631 compare instructions require registers except when comparing
19632 against zero or when converting operand 1 from fixed point to
19633 floating point. */
19635 if (!is_sse
19636 && (fpcmp_mode == CCFPUmode
19637 || (op_mode == XFmode
19638 && ! (standard_80387_constant_p (op0) == 1
19639 || standard_80387_constant_p (op1) == 1)
19640 && GET_CODE (op1) != FLOAT)
19641 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19643 op0 = force_reg (op_mode, op0);
19644 op1 = force_reg (op_mode, op1);
19646 else
19648 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19649 things around if they appear profitable, otherwise force op0
19650 into a register. */
19652 if (standard_80387_constant_p (op0) == 0
19653 || (MEM_P (op0)
19654 && ! (standard_80387_constant_p (op1) == 0
19655 || MEM_P (op1))))
19657 enum rtx_code new_code = ix86_fp_swap_condition (code);
19658 if (new_code != UNKNOWN)
19660 rtx tmp;
19661 tmp = op0, op0 = op1, op1 = tmp;
19662 code = new_code;
19666 if (!REG_P (op0))
19667 op0 = force_reg (op_mode, op0);
19669 if (CONSTANT_P (op1))
19671 int tmp = standard_80387_constant_p (op1);
19672 if (tmp == 0)
19673 op1 = validize_mem (force_const_mem (op_mode, op1));
19674 else if (tmp == 1)
19676 if (TARGET_CMOVE)
19677 op1 = force_reg (op_mode, op1);
19679 else
19680 op1 = force_reg (op_mode, op1);
19684 /* Try to rearrange the comparison to make it cheaper. */
19685 if (ix86_fp_comparison_cost (code)
19686 > ix86_fp_comparison_cost (swap_condition (code))
19687 && (REG_P (op1) || can_create_pseudo_p ()))
19689 rtx tmp;
19690 tmp = op0, op0 = op1, op1 = tmp;
19691 code = swap_condition (code);
19692 if (!REG_P (op0))
19693 op0 = force_reg (op_mode, op0);
19696 *pop0 = op0;
19697 *pop1 = op1;
19698 return code;
19701 /* Convert comparison codes we use to represent FP comparison to integer
19702 code that will result in proper branch. Return UNKNOWN if no such code
19703 is available. */
19705 enum rtx_code
19706 ix86_fp_compare_code_to_integer (enum rtx_code code)
19708 switch (code)
19710 case GT:
19711 return GTU;
19712 case GE:
19713 return GEU;
19714 case ORDERED:
19715 case UNORDERED:
19716 return code;
19717 break;
19718 case UNEQ:
19719 return EQ;
19720 break;
19721 case UNLT:
19722 return LTU;
19723 break;
19724 case UNLE:
19725 return LEU;
19726 break;
19727 case LTGT:
19728 return NE;
19729 break;
19730 default:
19731 return UNKNOWN;
19735 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19737 static rtx
19738 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19740 enum machine_mode fpcmp_mode, intcmp_mode;
19741 rtx tmp, tmp2;
19743 fpcmp_mode = ix86_fp_compare_mode (code);
19744 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19746 /* Do fcomi/sahf based test when profitable. */
19747 switch (ix86_fp_comparison_strategy (code))
19749 case IX86_FPCMP_COMI:
19750 intcmp_mode = fpcmp_mode;
19751 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19752 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19753 tmp);
19754 emit_insn (tmp);
19755 break;
19757 case IX86_FPCMP_SAHF:
19758 intcmp_mode = fpcmp_mode;
19759 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19760 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19761 tmp);
19763 if (!scratch)
19764 scratch = gen_reg_rtx (HImode);
19765 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19766 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19767 break;
19769 case IX86_FPCMP_ARITH:
19770 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19771 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19772 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19773 if (!scratch)
19774 scratch = gen_reg_rtx (HImode);
19775 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19777 /* In the unordered case, we have to check C2 for NaN's, which
19778 doesn't happen to work out to anything nice combination-wise.
19779 So do some bit twiddling on the value we've got in AH to come
19780 up with an appropriate set of condition codes. */
19782 intcmp_mode = CCNOmode;
19783 switch (code)
19785 case GT:
19786 case UNGT:
19787 if (code == GT || !TARGET_IEEE_FP)
19789 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19790 code = EQ;
19792 else
19794 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19795 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19796 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19797 intcmp_mode = CCmode;
19798 code = GEU;
19800 break;
19801 case LT:
19802 case UNLT:
19803 if (code == LT && TARGET_IEEE_FP)
19805 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19806 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19807 intcmp_mode = CCmode;
19808 code = EQ;
19810 else
19812 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19813 code = NE;
19815 break;
19816 case GE:
19817 case UNGE:
19818 if (code == GE || !TARGET_IEEE_FP)
19820 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19821 code = EQ;
19823 else
19825 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19826 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19827 code = NE;
19829 break;
19830 case LE:
19831 case UNLE:
19832 if (code == LE && TARGET_IEEE_FP)
19834 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19835 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19836 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19837 intcmp_mode = CCmode;
19838 code = LTU;
19840 else
19842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19843 code = NE;
19845 break;
19846 case EQ:
19847 case UNEQ:
19848 if (code == EQ && TARGET_IEEE_FP)
19850 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19851 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19852 intcmp_mode = CCmode;
19853 code = EQ;
19855 else
19857 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19858 code = NE;
19860 break;
19861 case NE:
19862 case LTGT:
19863 if (code == NE && TARGET_IEEE_FP)
19865 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19866 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19867 GEN_INT (0x40)));
19868 code = NE;
19870 else
19872 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19873 code = EQ;
19875 break;
19877 case UNORDERED:
19878 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19879 code = NE;
19880 break;
19881 case ORDERED:
19882 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19883 code = EQ;
19884 break;
19886 default:
19887 gcc_unreachable ();
19889 break;
19891 default:
19892 gcc_unreachable();
19895 /* Return the test that should be put into the flags user, i.e.
19896 the bcc, scc, or cmov instruction. */
19897 return gen_rtx_fmt_ee (code, VOIDmode,
19898 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19899 const0_rtx);
19902 static rtx
19903 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19905 rtx ret;
19907 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19908 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19910 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19912 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19913 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19915 else
19916 ret = ix86_expand_int_compare (code, op0, op1);
19918 return ret;
19921 void
19922 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19924 enum machine_mode mode = GET_MODE (op0);
19925 rtx tmp;
19927 switch (mode)
19929 case SFmode:
19930 case DFmode:
19931 case XFmode:
19932 case QImode:
19933 case HImode:
19934 case SImode:
19935 simple:
19936 tmp = ix86_expand_compare (code, op0, op1);
19937 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19938 gen_rtx_LABEL_REF (VOIDmode, label),
19939 pc_rtx);
19940 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19941 return;
19943 case DImode:
19944 if (TARGET_64BIT)
19945 goto simple;
19946 case TImode:
19947 /* Expand DImode branch into multiple compare+branch. */
19949 rtx lo[2], hi[2];
19950 rtx_code_label *label2;
19951 enum rtx_code code1, code2, code3;
19952 enum machine_mode submode;
19954 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19956 tmp = op0, op0 = op1, op1 = tmp;
19957 code = swap_condition (code);
19960 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19961 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19963 submode = mode == DImode ? SImode : DImode;
19965 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19966 avoid two branches. This costs one extra insn, so disable when
19967 optimizing for size. */
19969 if ((code == EQ || code == NE)
19970 && (!optimize_insn_for_size_p ()
19971 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19973 rtx xor0, xor1;
19975 xor1 = hi[0];
19976 if (hi[1] != const0_rtx)
19977 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19978 NULL_RTX, 0, OPTAB_WIDEN);
19980 xor0 = lo[0];
19981 if (lo[1] != const0_rtx)
19982 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19983 NULL_RTX, 0, OPTAB_WIDEN);
19985 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19986 NULL_RTX, 0, OPTAB_WIDEN);
19988 ix86_expand_branch (code, tmp, const0_rtx, label);
19989 return;
19992 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19993 op1 is a constant and the low word is zero, then we can just
19994 examine the high word. Similarly for low word -1 and
19995 less-or-equal-than or greater-than. */
19997 if (CONST_INT_P (hi[1]))
19998 switch (code)
20000 case LT: case LTU: case GE: case GEU:
20001 if (lo[1] == const0_rtx)
20003 ix86_expand_branch (code, hi[0], hi[1], label);
20004 return;
20006 break;
20007 case LE: case LEU: case GT: case GTU:
20008 if (lo[1] == constm1_rtx)
20010 ix86_expand_branch (code, hi[0], hi[1], label);
20011 return;
20013 break;
20014 default:
20015 break;
20018 /* Otherwise, we need two or three jumps. */
20020 label2 = gen_label_rtx ();
20022 code1 = code;
20023 code2 = swap_condition (code);
20024 code3 = unsigned_condition (code);
20026 switch (code)
20028 case LT: case GT: case LTU: case GTU:
20029 break;
20031 case LE: code1 = LT; code2 = GT; break;
20032 case GE: code1 = GT; code2 = LT; break;
20033 case LEU: code1 = LTU; code2 = GTU; break;
20034 case GEU: code1 = GTU; code2 = LTU; break;
20036 case EQ: code1 = UNKNOWN; code2 = NE; break;
20037 case NE: code2 = UNKNOWN; break;
20039 default:
20040 gcc_unreachable ();
20044 * a < b =>
20045 * if (hi(a) < hi(b)) goto true;
20046 * if (hi(a) > hi(b)) goto false;
20047 * if (lo(a) < lo(b)) goto true;
20048 * false:
20051 if (code1 != UNKNOWN)
20052 ix86_expand_branch (code1, hi[0], hi[1], label);
20053 if (code2 != UNKNOWN)
20054 ix86_expand_branch (code2, hi[0], hi[1], label2);
20056 ix86_expand_branch (code3, lo[0], lo[1], label);
20058 if (code2 != UNKNOWN)
20059 emit_label (label2);
20060 return;
20063 default:
20064 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20065 goto simple;
20069 /* Split branch based on floating point condition. */
20070 void
20071 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20072 rtx target1, rtx target2, rtx tmp)
20074 rtx condition;
20075 rtx i;
20077 if (target2 != pc_rtx)
20079 rtx tmp = target2;
20080 code = reverse_condition_maybe_unordered (code);
20081 target2 = target1;
20082 target1 = tmp;
20085 condition = ix86_expand_fp_compare (code, op1, op2,
20086 tmp);
20088 i = emit_jump_insn (gen_rtx_SET
20089 (VOIDmode, pc_rtx,
20090 gen_rtx_IF_THEN_ELSE (VOIDmode,
20091 condition, target1, target2)));
20092 if (split_branch_probability >= 0)
20093 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20096 void
20097 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20099 rtx ret;
20101 gcc_assert (GET_MODE (dest) == QImode);
20103 ret = ix86_expand_compare (code, op0, op1);
20104 PUT_MODE (ret, QImode);
20105 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20108 /* Expand comparison setting or clearing carry flag. Return true when
20109 successful and set pop for the operation. */
20110 static bool
20111 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20113 enum machine_mode mode =
20114 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20116 /* Do not handle double-mode compares that go through special path. */
20117 if (mode == (TARGET_64BIT ? TImode : DImode))
20118 return false;
20120 if (SCALAR_FLOAT_MODE_P (mode))
20122 rtx compare_op;
20123 rtx_insn *compare_seq;
20125 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20127 /* Shortcut: following common codes never translate
20128 into carry flag compares. */
20129 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20130 || code == ORDERED || code == UNORDERED)
20131 return false;
20133 /* These comparisons require zero flag; swap operands so they won't. */
20134 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20135 && !TARGET_IEEE_FP)
20137 rtx tmp = op0;
20138 op0 = op1;
20139 op1 = tmp;
20140 code = swap_condition (code);
20143 /* Try to expand the comparison and verify that we end up with
20144 carry flag based comparison. This fails to be true only when
20145 we decide to expand comparison using arithmetic that is not
20146 too common scenario. */
20147 start_sequence ();
20148 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20149 compare_seq = get_insns ();
20150 end_sequence ();
20152 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20153 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20154 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20155 else
20156 code = GET_CODE (compare_op);
20158 if (code != LTU && code != GEU)
20159 return false;
20161 emit_insn (compare_seq);
20162 *pop = compare_op;
20163 return true;
20166 if (!INTEGRAL_MODE_P (mode))
20167 return false;
20169 switch (code)
20171 case LTU:
20172 case GEU:
20173 break;
20175 /* Convert a==0 into (unsigned)a<1. */
20176 case EQ:
20177 case NE:
20178 if (op1 != const0_rtx)
20179 return false;
20180 op1 = const1_rtx;
20181 code = (code == EQ ? LTU : GEU);
20182 break;
20184 /* Convert a>b into b<a or a>=b-1. */
20185 case GTU:
20186 case LEU:
20187 if (CONST_INT_P (op1))
20189 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20190 /* Bail out on overflow. We still can swap operands but that
20191 would force loading of the constant into register. */
20192 if (op1 == const0_rtx
20193 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20194 return false;
20195 code = (code == GTU ? GEU : LTU);
20197 else
20199 rtx tmp = op1;
20200 op1 = op0;
20201 op0 = tmp;
20202 code = (code == GTU ? LTU : GEU);
20204 break;
20206 /* Convert a>=0 into (unsigned)a<0x80000000. */
20207 case LT:
20208 case GE:
20209 if (mode == DImode || op1 != const0_rtx)
20210 return false;
20211 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20212 code = (code == LT ? GEU : LTU);
20213 break;
20214 case LE:
20215 case GT:
20216 if (mode == DImode || op1 != constm1_rtx)
20217 return false;
20218 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20219 code = (code == LE ? GEU : LTU);
20220 break;
20222 default:
20223 return false;
20225 /* Swapping operands may cause constant to appear as first operand. */
20226 if (!nonimmediate_operand (op0, VOIDmode))
20228 if (!can_create_pseudo_p ())
20229 return false;
20230 op0 = force_reg (mode, op0);
20232 *pop = ix86_expand_compare (code, op0, op1);
20233 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20234 return true;
20237 bool
20238 ix86_expand_int_movcc (rtx operands[])
20240 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20241 rtx_insn *compare_seq;
20242 rtx compare_op;
20243 enum machine_mode mode = GET_MODE (operands[0]);
20244 bool sign_bit_compare_p = false;
20245 rtx op0 = XEXP (operands[1], 0);
20246 rtx op1 = XEXP (operands[1], 1);
20248 if (GET_MODE (op0) == TImode
20249 || (GET_MODE (op0) == DImode
20250 && !TARGET_64BIT))
20251 return false;
20253 start_sequence ();
20254 compare_op = ix86_expand_compare (code, op0, op1);
20255 compare_seq = get_insns ();
20256 end_sequence ();
20258 compare_code = GET_CODE (compare_op);
20260 if ((op1 == const0_rtx && (code == GE || code == LT))
20261 || (op1 == constm1_rtx && (code == GT || code == LE)))
20262 sign_bit_compare_p = true;
20264 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20265 HImode insns, we'd be swallowed in word prefix ops. */
20267 if ((mode != HImode || TARGET_FAST_PREFIX)
20268 && (mode != (TARGET_64BIT ? TImode : DImode))
20269 && CONST_INT_P (operands[2])
20270 && CONST_INT_P (operands[3]))
20272 rtx out = operands[0];
20273 HOST_WIDE_INT ct = INTVAL (operands[2]);
20274 HOST_WIDE_INT cf = INTVAL (operands[3]);
20275 HOST_WIDE_INT diff;
20277 diff = ct - cf;
20278 /* Sign bit compares are better done using shifts than we do by using
20279 sbb. */
20280 if (sign_bit_compare_p
20281 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20283 /* Detect overlap between destination and compare sources. */
20284 rtx tmp = out;
20286 if (!sign_bit_compare_p)
20288 rtx flags;
20289 bool fpcmp = false;
20291 compare_code = GET_CODE (compare_op);
20293 flags = XEXP (compare_op, 0);
20295 if (GET_MODE (flags) == CCFPmode
20296 || GET_MODE (flags) == CCFPUmode)
20298 fpcmp = true;
20299 compare_code
20300 = ix86_fp_compare_code_to_integer (compare_code);
20303 /* To simplify rest of code, restrict to the GEU case. */
20304 if (compare_code == LTU)
20306 HOST_WIDE_INT tmp = ct;
20307 ct = cf;
20308 cf = tmp;
20309 compare_code = reverse_condition (compare_code);
20310 code = reverse_condition (code);
20312 else
20314 if (fpcmp)
20315 PUT_CODE (compare_op,
20316 reverse_condition_maybe_unordered
20317 (GET_CODE (compare_op)));
20318 else
20319 PUT_CODE (compare_op,
20320 reverse_condition (GET_CODE (compare_op)));
20322 diff = ct - cf;
20324 if (reg_overlap_mentioned_p (out, op0)
20325 || reg_overlap_mentioned_p (out, op1))
20326 tmp = gen_reg_rtx (mode);
20328 if (mode == DImode)
20329 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20330 else
20331 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20332 flags, compare_op));
20334 else
20336 if (code == GT || code == GE)
20337 code = reverse_condition (code);
20338 else
20340 HOST_WIDE_INT tmp = ct;
20341 ct = cf;
20342 cf = tmp;
20343 diff = ct - cf;
20345 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20348 if (diff == 1)
20351 * cmpl op0,op1
20352 * sbbl dest,dest
20353 * [addl dest, ct]
20355 * Size 5 - 8.
20357 if (ct)
20358 tmp = expand_simple_binop (mode, PLUS,
20359 tmp, GEN_INT (ct),
20360 copy_rtx (tmp), 1, OPTAB_DIRECT);
20362 else if (cf == -1)
20365 * cmpl op0,op1
20366 * sbbl dest,dest
20367 * orl $ct, dest
20369 * Size 8.
20371 tmp = expand_simple_binop (mode, IOR,
20372 tmp, GEN_INT (ct),
20373 copy_rtx (tmp), 1, OPTAB_DIRECT);
20375 else if (diff == -1 && ct)
20378 * cmpl op0,op1
20379 * sbbl dest,dest
20380 * notl dest
20381 * [addl dest, cf]
20383 * Size 8 - 11.
20385 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20386 if (cf)
20387 tmp = expand_simple_binop (mode, PLUS,
20388 copy_rtx (tmp), GEN_INT (cf),
20389 copy_rtx (tmp), 1, OPTAB_DIRECT);
20391 else
20394 * cmpl op0,op1
20395 * sbbl dest,dest
20396 * [notl dest]
20397 * andl cf - ct, dest
20398 * [addl dest, ct]
20400 * Size 8 - 11.
20403 if (cf == 0)
20405 cf = ct;
20406 ct = 0;
20407 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20410 tmp = expand_simple_binop (mode, AND,
20411 copy_rtx (tmp),
20412 gen_int_mode (cf - ct, mode),
20413 copy_rtx (tmp), 1, OPTAB_DIRECT);
20414 if (ct)
20415 tmp = expand_simple_binop (mode, PLUS,
20416 copy_rtx (tmp), GEN_INT (ct),
20417 copy_rtx (tmp), 1, OPTAB_DIRECT);
20420 if (!rtx_equal_p (tmp, out))
20421 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20423 return true;
20426 if (diff < 0)
20428 enum machine_mode cmp_mode = GET_MODE (op0);
20430 HOST_WIDE_INT tmp;
20431 tmp = ct, ct = cf, cf = tmp;
20432 diff = -diff;
20434 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20436 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20438 /* We may be reversing unordered compare to normal compare, that
20439 is not valid in general (we may convert non-trapping condition
20440 to trapping one), however on i386 we currently emit all
20441 comparisons unordered. */
20442 compare_code = reverse_condition_maybe_unordered (compare_code);
20443 code = reverse_condition_maybe_unordered (code);
20445 else
20447 compare_code = reverse_condition (compare_code);
20448 code = reverse_condition (code);
20452 compare_code = UNKNOWN;
20453 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20454 && CONST_INT_P (op1))
20456 if (op1 == const0_rtx
20457 && (code == LT || code == GE))
20458 compare_code = code;
20459 else if (op1 == constm1_rtx)
20461 if (code == LE)
20462 compare_code = LT;
20463 else if (code == GT)
20464 compare_code = GE;
20468 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20469 if (compare_code != UNKNOWN
20470 && GET_MODE (op0) == GET_MODE (out)
20471 && (cf == -1 || ct == -1))
20473 /* If lea code below could be used, only optimize
20474 if it results in a 2 insn sequence. */
20476 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20477 || diff == 3 || diff == 5 || diff == 9)
20478 || (compare_code == LT && ct == -1)
20479 || (compare_code == GE && cf == -1))
20482 * notl op1 (if necessary)
20483 * sarl $31, op1
20484 * orl cf, op1
20486 if (ct != -1)
20488 cf = ct;
20489 ct = -1;
20490 code = reverse_condition (code);
20493 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20495 out = expand_simple_binop (mode, IOR,
20496 out, GEN_INT (cf),
20497 out, 1, OPTAB_DIRECT);
20498 if (out != operands[0])
20499 emit_move_insn (operands[0], out);
20501 return true;
20506 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20507 || diff == 3 || diff == 5 || diff == 9)
20508 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20509 && (mode != DImode
20510 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20513 * xorl dest,dest
20514 * cmpl op1,op2
20515 * setcc dest
20516 * lea cf(dest*(ct-cf)),dest
20518 * Size 14.
20520 * This also catches the degenerate setcc-only case.
20523 rtx tmp;
20524 int nops;
20526 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20528 nops = 0;
20529 /* On x86_64 the lea instruction operates on Pmode, so we need
20530 to get arithmetics done in proper mode to match. */
20531 if (diff == 1)
20532 tmp = copy_rtx (out);
20533 else
20535 rtx out1;
20536 out1 = copy_rtx (out);
20537 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20538 nops++;
20539 if (diff & 1)
20541 tmp = gen_rtx_PLUS (mode, tmp, out1);
20542 nops++;
20545 if (cf != 0)
20547 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20548 nops++;
20550 if (!rtx_equal_p (tmp, out))
20552 if (nops == 1)
20553 out = force_operand (tmp, copy_rtx (out));
20554 else
20555 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20557 if (!rtx_equal_p (out, operands[0]))
20558 emit_move_insn (operands[0], copy_rtx (out));
20560 return true;
20564 * General case: Jumpful:
20565 * xorl dest,dest cmpl op1, op2
20566 * cmpl op1, op2 movl ct, dest
20567 * setcc dest jcc 1f
20568 * decl dest movl cf, dest
20569 * andl (cf-ct),dest 1:
20570 * addl ct,dest
20572 * Size 20. Size 14.
20574 * This is reasonably steep, but branch mispredict costs are
20575 * high on modern cpus, so consider failing only if optimizing
20576 * for space.
20579 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20580 && BRANCH_COST (optimize_insn_for_speed_p (),
20581 false) >= 2)
20583 if (cf == 0)
20585 enum machine_mode cmp_mode = GET_MODE (op0);
20587 cf = ct;
20588 ct = 0;
20590 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20592 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20594 /* We may be reversing unordered compare to normal compare,
20595 that is not valid in general (we may convert non-trapping
20596 condition to trapping one), however on i386 we currently
20597 emit all comparisons unordered. */
20598 code = reverse_condition_maybe_unordered (code);
20600 else
20602 code = reverse_condition (code);
20603 if (compare_code != UNKNOWN)
20604 compare_code = reverse_condition (compare_code);
20608 if (compare_code != UNKNOWN)
20610 /* notl op1 (if needed)
20611 sarl $31, op1
20612 andl (cf-ct), op1
20613 addl ct, op1
20615 For x < 0 (resp. x <= -1) there will be no notl,
20616 so if possible swap the constants to get rid of the
20617 complement.
20618 True/false will be -1/0 while code below (store flag
20619 followed by decrement) is 0/-1, so the constants need
20620 to be exchanged once more. */
20622 if (compare_code == GE || !cf)
20624 code = reverse_condition (code);
20625 compare_code = LT;
20627 else
20629 HOST_WIDE_INT tmp = cf;
20630 cf = ct;
20631 ct = tmp;
20634 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20636 else
20638 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20640 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20641 constm1_rtx,
20642 copy_rtx (out), 1, OPTAB_DIRECT);
20645 out = expand_simple_binop (mode, AND, copy_rtx (out),
20646 gen_int_mode (cf - ct, mode),
20647 copy_rtx (out), 1, OPTAB_DIRECT);
20648 if (ct)
20649 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20650 copy_rtx (out), 1, OPTAB_DIRECT);
20651 if (!rtx_equal_p (out, operands[0]))
20652 emit_move_insn (operands[0], copy_rtx (out));
20654 return true;
20658 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20660 /* Try a few things more with specific constants and a variable. */
20662 optab op;
20663 rtx var, orig_out, out, tmp;
20665 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20666 return false;
20668 /* If one of the two operands is an interesting constant, load a
20669 constant with the above and mask it in with a logical operation. */
20671 if (CONST_INT_P (operands[2]))
20673 var = operands[3];
20674 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20675 operands[3] = constm1_rtx, op = and_optab;
20676 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20677 operands[3] = const0_rtx, op = ior_optab;
20678 else
20679 return false;
20681 else if (CONST_INT_P (operands[3]))
20683 var = operands[2];
20684 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20685 operands[2] = constm1_rtx, op = and_optab;
20686 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20687 operands[2] = const0_rtx, op = ior_optab;
20688 else
20689 return false;
20691 else
20692 return false;
20694 orig_out = operands[0];
20695 tmp = gen_reg_rtx (mode);
20696 operands[0] = tmp;
20698 /* Recurse to get the constant loaded. */
20699 if (ix86_expand_int_movcc (operands) == 0)
20700 return false;
20702 /* Mask in the interesting variable. */
20703 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20704 OPTAB_WIDEN);
20705 if (!rtx_equal_p (out, orig_out))
20706 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20708 return true;
20712 * For comparison with above,
20714 * movl cf,dest
20715 * movl ct,tmp
20716 * cmpl op1,op2
20717 * cmovcc tmp,dest
20719 * Size 15.
20722 if (! nonimmediate_operand (operands[2], mode))
20723 operands[2] = force_reg (mode, operands[2]);
20724 if (! nonimmediate_operand (operands[3], mode))
20725 operands[3] = force_reg (mode, operands[3]);
20727 if (! register_operand (operands[2], VOIDmode)
20728 && (mode == QImode
20729 || ! register_operand (operands[3], VOIDmode)))
20730 operands[2] = force_reg (mode, operands[2]);
20732 if (mode == QImode
20733 && ! register_operand (operands[3], VOIDmode))
20734 operands[3] = force_reg (mode, operands[3]);
20736 emit_insn (compare_seq);
20737 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20738 gen_rtx_IF_THEN_ELSE (mode,
20739 compare_op, operands[2],
20740 operands[3])));
20741 return true;
20744 /* Swap, force into registers, or otherwise massage the two operands
20745 to an sse comparison with a mask result. Thus we differ a bit from
20746 ix86_prepare_fp_compare_args which expects to produce a flags result.
20748 The DEST operand exists to help determine whether to commute commutative
20749 operators. The POP0/POP1 operands are updated in place. The new
20750 comparison code is returned, or UNKNOWN if not implementable. */
20752 static enum rtx_code
20753 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20754 rtx *pop0, rtx *pop1)
20756 rtx tmp;
20758 switch (code)
20760 case LTGT:
20761 case UNEQ:
20762 /* AVX supports all the needed comparisons. */
20763 if (TARGET_AVX)
20764 break;
20765 /* We have no LTGT as an operator. We could implement it with
20766 NE & ORDERED, but this requires an extra temporary. It's
20767 not clear that it's worth it. */
20768 return UNKNOWN;
20770 case LT:
20771 case LE:
20772 case UNGT:
20773 case UNGE:
20774 /* These are supported directly. */
20775 break;
20777 case EQ:
20778 case NE:
20779 case UNORDERED:
20780 case ORDERED:
20781 /* AVX has 3 operand comparisons, no need to swap anything. */
20782 if (TARGET_AVX)
20783 break;
20784 /* For commutative operators, try to canonicalize the destination
20785 operand to be first in the comparison - this helps reload to
20786 avoid extra moves. */
20787 if (!dest || !rtx_equal_p (dest, *pop1))
20788 break;
20789 /* FALLTHRU */
20791 case GE:
20792 case GT:
20793 case UNLE:
20794 case UNLT:
20795 /* These are not supported directly before AVX, and furthermore
20796 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20797 comparison operands to transform into something that is
20798 supported. */
20799 tmp = *pop0;
20800 *pop0 = *pop1;
20801 *pop1 = tmp;
20802 code = swap_condition (code);
20803 break;
20805 default:
20806 gcc_unreachable ();
20809 return code;
20812 /* Detect conditional moves that exactly match min/max operational
20813 semantics. Note that this is IEEE safe, as long as we don't
20814 interchange the operands.
20816 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20817 and TRUE if the operation is successful and instructions are emitted. */
20819 static bool
20820 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20821 rtx cmp_op1, rtx if_true, rtx if_false)
20823 enum machine_mode mode;
20824 bool is_min;
20825 rtx tmp;
20827 if (code == LT)
20829 else if (code == UNGE)
20831 tmp = if_true;
20832 if_true = if_false;
20833 if_false = tmp;
20835 else
20836 return false;
20838 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20839 is_min = true;
20840 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20841 is_min = false;
20842 else
20843 return false;
20845 mode = GET_MODE (dest);
20847 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20848 but MODE may be a vector mode and thus not appropriate. */
20849 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20851 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20852 rtvec v;
20854 if_true = force_reg (mode, if_true);
20855 v = gen_rtvec (2, if_true, if_false);
20856 tmp = gen_rtx_UNSPEC (mode, v, u);
20858 else
20860 code = is_min ? SMIN : SMAX;
20861 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20864 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20865 return true;
20868 /* Expand an sse vector comparison. Return the register with the result. */
20870 static rtx
20871 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20872 rtx op_true, rtx op_false)
20874 enum machine_mode mode = GET_MODE (dest);
20875 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20877 /* In general case result of comparison can differ from operands' type. */
20878 enum machine_mode cmp_mode;
20880 /* In AVX512F the result of comparison is an integer mask. */
20881 bool maskcmp = false;
20882 rtx x;
20884 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20886 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20887 gcc_assert (cmp_mode != BLKmode);
20889 maskcmp = true;
20891 else
20892 cmp_mode = cmp_ops_mode;
20895 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20896 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20897 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20899 if (optimize
20900 || reg_overlap_mentioned_p (dest, op_true)
20901 || reg_overlap_mentioned_p (dest, op_false))
20902 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20904 /* Compare patterns for int modes are unspec in AVX512F only. */
20905 if (maskcmp && (code == GT || code == EQ))
20907 rtx (*gen)(rtx, rtx, rtx);
20909 switch (cmp_ops_mode)
20911 case V16SImode:
20912 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20913 break;
20914 case V8DImode:
20915 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20916 break;
20917 default:
20918 gen = NULL;
20921 if (gen)
20923 emit_insn (gen (dest, cmp_op0, cmp_op1));
20924 return dest;
20927 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20929 if (cmp_mode != mode && !maskcmp)
20931 x = force_reg (cmp_ops_mode, x);
20932 convert_move (dest, x, false);
20934 else
20935 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20937 return dest;
20940 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20941 operations. This is used for both scalar and vector conditional moves. */
20943 static void
20944 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20946 enum machine_mode mode = GET_MODE (dest);
20947 enum machine_mode cmpmode = GET_MODE (cmp);
20949 /* In AVX512F the result of comparison is an integer mask. */
20950 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20952 rtx t2, t3, x;
20954 if (vector_all_ones_operand (op_true, mode)
20955 && rtx_equal_p (op_false, CONST0_RTX (mode))
20956 && !maskcmp)
20958 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20960 else if (op_false == CONST0_RTX (mode)
20961 && !maskcmp)
20963 op_true = force_reg (mode, op_true);
20964 x = gen_rtx_AND (mode, cmp, op_true);
20965 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20967 else if (op_true == CONST0_RTX (mode)
20968 && !maskcmp)
20970 op_false = force_reg (mode, op_false);
20971 x = gen_rtx_NOT (mode, cmp);
20972 x = gen_rtx_AND (mode, x, op_false);
20973 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20975 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20976 && !maskcmp)
20978 op_false = force_reg (mode, op_false);
20979 x = gen_rtx_IOR (mode, cmp, op_false);
20980 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20982 else if (TARGET_XOP
20983 && !maskcmp)
20985 op_true = force_reg (mode, op_true);
20987 if (!nonimmediate_operand (op_false, mode))
20988 op_false = force_reg (mode, op_false);
20990 emit_insn (gen_rtx_SET (mode, dest,
20991 gen_rtx_IF_THEN_ELSE (mode, cmp,
20992 op_true,
20993 op_false)));
20995 else
20997 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20998 rtx d = dest;
21000 if (!nonimmediate_operand (op_true, mode))
21001 op_true = force_reg (mode, op_true);
21003 op_false = force_reg (mode, op_false);
21005 switch (mode)
21007 case V4SFmode:
21008 if (TARGET_SSE4_1)
21009 gen = gen_sse4_1_blendvps;
21010 break;
21011 case V2DFmode:
21012 if (TARGET_SSE4_1)
21013 gen = gen_sse4_1_blendvpd;
21014 break;
21015 case V16QImode:
21016 case V8HImode:
21017 case V4SImode:
21018 case V2DImode:
21019 if (TARGET_SSE4_1)
21021 gen = gen_sse4_1_pblendvb;
21022 if (mode != V16QImode)
21023 d = gen_reg_rtx (V16QImode);
21024 op_false = gen_lowpart (V16QImode, op_false);
21025 op_true = gen_lowpart (V16QImode, op_true);
21026 cmp = gen_lowpart (V16QImode, cmp);
21028 break;
21029 case V8SFmode:
21030 if (TARGET_AVX)
21031 gen = gen_avx_blendvps256;
21032 break;
21033 case V4DFmode:
21034 if (TARGET_AVX)
21035 gen = gen_avx_blendvpd256;
21036 break;
21037 case V32QImode:
21038 case V16HImode:
21039 case V8SImode:
21040 case V4DImode:
21041 if (TARGET_AVX2)
21043 gen = gen_avx2_pblendvb;
21044 if (mode != V32QImode)
21045 d = gen_reg_rtx (V32QImode);
21046 op_false = gen_lowpart (V32QImode, op_false);
21047 op_true = gen_lowpart (V32QImode, op_true);
21048 cmp = gen_lowpart (V32QImode, cmp);
21050 break;
21052 case V16SImode:
21053 gen = gen_avx512f_blendmv16si;
21054 break;
21055 case V8DImode:
21056 gen = gen_avx512f_blendmv8di;
21057 break;
21058 case V8DFmode:
21059 gen = gen_avx512f_blendmv8df;
21060 break;
21061 case V16SFmode:
21062 gen = gen_avx512f_blendmv16sf;
21063 break;
21065 default:
21066 break;
21069 if (gen != NULL)
21071 emit_insn (gen (d, op_false, op_true, cmp));
21072 if (d != dest)
21073 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21075 else
21077 op_true = force_reg (mode, op_true);
21079 t2 = gen_reg_rtx (mode);
21080 if (optimize)
21081 t3 = gen_reg_rtx (mode);
21082 else
21083 t3 = dest;
21085 x = gen_rtx_AND (mode, op_true, cmp);
21086 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21088 x = gen_rtx_NOT (mode, cmp);
21089 x = gen_rtx_AND (mode, x, op_false);
21090 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21092 x = gen_rtx_IOR (mode, t3, t2);
21093 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21098 /* Expand a floating-point conditional move. Return true if successful. */
21100 bool
21101 ix86_expand_fp_movcc (rtx operands[])
21103 enum machine_mode mode = GET_MODE (operands[0]);
21104 enum rtx_code code = GET_CODE (operands[1]);
21105 rtx tmp, compare_op;
21106 rtx op0 = XEXP (operands[1], 0);
21107 rtx op1 = XEXP (operands[1], 1);
21109 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21111 enum machine_mode cmode;
21113 /* Since we've no cmove for sse registers, don't force bad register
21114 allocation just to gain access to it. Deny movcc when the
21115 comparison mode doesn't match the move mode. */
21116 cmode = GET_MODE (op0);
21117 if (cmode == VOIDmode)
21118 cmode = GET_MODE (op1);
21119 if (cmode != mode)
21120 return false;
21122 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21123 if (code == UNKNOWN)
21124 return false;
21126 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21127 operands[2], operands[3]))
21128 return true;
21130 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21131 operands[2], operands[3]);
21132 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21133 return true;
21136 if (GET_MODE (op0) == TImode
21137 || (GET_MODE (op0) == DImode
21138 && !TARGET_64BIT))
21139 return false;
21141 /* The floating point conditional move instructions don't directly
21142 support conditions resulting from a signed integer comparison. */
21144 compare_op = ix86_expand_compare (code, op0, op1);
21145 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21147 tmp = gen_reg_rtx (QImode);
21148 ix86_expand_setcc (tmp, code, op0, op1);
21150 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21153 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21154 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21155 operands[2], operands[3])));
21157 return true;
21160 /* Expand a floating-point vector conditional move; a vcond operation
21161 rather than a movcc operation. */
21163 bool
21164 ix86_expand_fp_vcond (rtx operands[])
21166 enum rtx_code code = GET_CODE (operands[3]);
21167 rtx cmp;
21169 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21170 &operands[4], &operands[5]);
21171 if (code == UNKNOWN)
21173 rtx temp;
21174 switch (GET_CODE (operands[3]))
21176 case LTGT:
21177 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21178 operands[5], operands[0], operands[0]);
21179 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21180 operands[5], operands[1], operands[2]);
21181 code = AND;
21182 break;
21183 case UNEQ:
21184 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21185 operands[5], operands[0], operands[0]);
21186 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21187 operands[5], operands[1], operands[2]);
21188 code = IOR;
21189 break;
21190 default:
21191 gcc_unreachable ();
21193 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21194 OPTAB_DIRECT);
21195 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21196 return true;
21199 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21200 operands[5], operands[1], operands[2]))
21201 return true;
21203 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21204 operands[1], operands[2]);
21205 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21206 return true;
21209 /* Expand a signed/unsigned integral vector conditional move. */
21211 bool
21212 ix86_expand_int_vcond (rtx operands[])
21214 enum machine_mode data_mode = GET_MODE (operands[0]);
21215 enum machine_mode mode = GET_MODE (operands[4]);
21216 enum rtx_code code = GET_CODE (operands[3]);
21217 bool negate = false;
21218 rtx x, cop0, cop1;
21220 cop0 = operands[4];
21221 cop1 = operands[5];
21223 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21224 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21225 if ((code == LT || code == GE)
21226 && data_mode == mode
21227 && cop1 == CONST0_RTX (mode)
21228 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21229 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21230 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21231 && (GET_MODE_SIZE (data_mode) == 16
21232 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21234 rtx negop = operands[2 - (code == LT)];
21235 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21236 if (negop == CONST1_RTX (data_mode))
21238 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21239 operands[0], 1, OPTAB_DIRECT);
21240 if (res != operands[0])
21241 emit_move_insn (operands[0], res);
21242 return true;
21244 else if (GET_MODE_INNER (data_mode) != DImode
21245 && vector_all_ones_operand (negop, data_mode))
21247 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21248 operands[0], 0, OPTAB_DIRECT);
21249 if (res != operands[0])
21250 emit_move_insn (operands[0], res);
21251 return true;
21255 if (!nonimmediate_operand (cop1, mode))
21256 cop1 = force_reg (mode, cop1);
21257 if (!general_operand (operands[1], data_mode))
21258 operands[1] = force_reg (data_mode, operands[1]);
21259 if (!general_operand (operands[2], data_mode))
21260 operands[2] = force_reg (data_mode, operands[2]);
21262 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21263 if (TARGET_XOP
21264 && (mode == V16QImode || mode == V8HImode
21265 || mode == V4SImode || mode == V2DImode))
21267 else
21269 /* Canonicalize the comparison to EQ, GT, GTU. */
21270 switch (code)
21272 case EQ:
21273 case GT:
21274 case GTU:
21275 break;
21277 case NE:
21278 case LE:
21279 case LEU:
21280 code = reverse_condition (code);
21281 negate = true;
21282 break;
21284 case GE:
21285 case GEU:
21286 code = reverse_condition (code);
21287 negate = true;
21288 /* FALLTHRU */
21290 case LT:
21291 case LTU:
21292 code = swap_condition (code);
21293 x = cop0, cop0 = cop1, cop1 = x;
21294 break;
21296 default:
21297 gcc_unreachable ();
21300 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21301 if (mode == V2DImode)
21303 switch (code)
21305 case EQ:
21306 /* SSE4.1 supports EQ. */
21307 if (!TARGET_SSE4_1)
21308 return false;
21309 break;
21311 case GT:
21312 case GTU:
21313 /* SSE4.2 supports GT/GTU. */
21314 if (!TARGET_SSE4_2)
21315 return false;
21316 break;
21318 default:
21319 gcc_unreachable ();
21323 /* Unsigned parallel compare is not supported by the hardware.
21324 Play some tricks to turn this into a signed comparison
21325 against 0. */
21326 if (code == GTU)
21328 cop0 = force_reg (mode, cop0);
21330 switch (mode)
21332 case V16SImode:
21333 case V8DImode:
21334 case V8SImode:
21335 case V4DImode:
21336 case V4SImode:
21337 case V2DImode:
21339 rtx t1, t2, mask;
21340 rtx (*gen_sub3) (rtx, rtx, rtx);
21342 switch (mode)
21344 case V16SImode: gen_sub3 = gen_subv16si3; break;
21345 case V8DImode: gen_sub3 = gen_subv8di3; break;
21346 case V8SImode: gen_sub3 = gen_subv8si3; break;
21347 case V4DImode: gen_sub3 = gen_subv4di3; break;
21348 case V4SImode: gen_sub3 = gen_subv4si3; break;
21349 case V2DImode: gen_sub3 = gen_subv2di3; break;
21350 default:
21351 gcc_unreachable ();
21353 /* Subtract (-(INT MAX) - 1) from both operands to make
21354 them signed. */
21355 mask = ix86_build_signbit_mask (mode, true, false);
21356 t1 = gen_reg_rtx (mode);
21357 emit_insn (gen_sub3 (t1, cop0, mask));
21359 t2 = gen_reg_rtx (mode);
21360 emit_insn (gen_sub3 (t2, cop1, mask));
21362 cop0 = t1;
21363 cop1 = t2;
21364 code = GT;
21366 break;
21368 case V32QImode:
21369 case V16HImode:
21370 case V16QImode:
21371 case V8HImode:
21372 /* Perform a parallel unsigned saturating subtraction. */
21373 x = gen_reg_rtx (mode);
21374 emit_insn (gen_rtx_SET (VOIDmode, x,
21375 gen_rtx_US_MINUS (mode, cop0, cop1)));
21377 cop0 = x;
21378 cop1 = CONST0_RTX (mode);
21379 code = EQ;
21380 negate = !negate;
21381 break;
21383 default:
21384 gcc_unreachable ();
21389 /* Allow the comparison to be done in one mode, but the movcc to
21390 happen in another mode. */
21391 if (data_mode == mode)
21393 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21394 operands[1+negate], operands[2-negate]);
21396 else
21398 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21399 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21400 operands[1+negate], operands[2-negate]);
21401 if (GET_MODE (x) == mode)
21402 x = gen_lowpart (data_mode, x);
21405 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21406 operands[2-negate]);
21407 return true;
21410 static bool
21411 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21413 enum machine_mode mode = GET_MODE (op0);
21414 switch (mode)
21416 case V16SImode:
21417 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21418 force_reg (V16SImode, mask),
21419 op1));
21420 return true;
21421 case V16SFmode:
21422 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21423 force_reg (V16SImode, mask),
21424 op1));
21425 return true;
21426 case V8DImode:
21427 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21428 force_reg (V8DImode, mask), op1));
21429 return true;
21430 case V8DFmode:
21431 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21432 force_reg (V8DImode, mask), op1));
21433 return true;
21434 default:
21435 return false;
21439 /* Expand a variable vector permutation. */
21441 void
21442 ix86_expand_vec_perm (rtx operands[])
21444 rtx target = operands[0];
21445 rtx op0 = operands[1];
21446 rtx op1 = operands[2];
21447 rtx mask = operands[3];
21448 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21449 enum machine_mode mode = GET_MODE (op0);
21450 enum machine_mode maskmode = GET_MODE (mask);
21451 int w, e, i;
21452 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21454 /* Number of elements in the vector. */
21455 w = GET_MODE_NUNITS (mode);
21456 e = GET_MODE_UNIT_SIZE (mode);
21457 gcc_assert (w <= 64);
21459 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21460 return;
21462 if (TARGET_AVX2)
21464 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21466 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21467 an constant shuffle operand. With a tiny bit of effort we can
21468 use VPERMD instead. A re-interpretation stall for V4DFmode is
21469 unfortunate but there's no avoiding it.
21470 Similarly for V16HImode we don't have instructions for variable
21471 shuffling, while for V32QImode we can use after preparing suitable
21472 masks vpshufb; vpshufb; vpermq; vpor. */
21474 if (mode == V16HImode)
21476 maskmode = mode = V32QImode;
21477 w = 32;
21478 e = 1;
21480 else
21482 maskmode = mode = V8SImode;
21483 w = 8;
21484 e = 4;
21486 t1 = gen_reg_rtx (maskmode);
21488 /* Replicate the low bits of the V4DImode mask into V8SImode:
21489 mask = { A B C D }
21490 t1 = { A A B B C C D D }. */
21491 for (i = 0; i < w / 2; ++i)
21492 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21493 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21494 vt = force_reg (maskmode, vt);
21495 mask = gen_lowpart (maskmode, mask);
21496 if (maskmode == V8SImode)
21497 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21498 else
21499 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21501 /* Multiply the shuffle indicies by two. */
21502 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21503 OPTAB_DIRECT);
21505 /* Add one to the odd shuffle indicies:
21506 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21507 for (i = 0; i < w / 2; ++i)
21509 vec[i * 2] = const0_rtx;
21510 vec[i * 2 + 1] = const1_rtx;
21512 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21513 vt = validize_mem (force_const_mem (maskmode, vt));
21514 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21515 OPTAB_DIRECT);
21517 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21518 operands[3] = mask = t1;
21519 target = gen_reg_rtx (mode);
21520 op0 = gen_lowpart (mode, op0);
21521 op1 = gen_lowpart (mode, op1);
21524 switch (mode)
21526 case V8SImode:
21527 /* The VPERMD and VPERMPS instructions already properly ignore
21528 the high bits of the shuffle elements. No need for us to
21529 perform an AND ourselves. */
21530 if (one_operand_shuffle)
21532 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21533 if (target != operands[0])
21534 emit_move_insn (operands[0],
21535 gen_lowpart (GET_MODE (operands[0]), target));
21537 else
21539 t1 = gen_reg_rtx (V8SImode);
21540 t2 = gen_reg_rtx (V8SImode);
21541 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21542 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21543 goto merge_two;
21545 return;
21547 case V8SFmode:
21548 mask = gen_lowpart (V8SImode, mask);
21549 if (one_operand_shuffle)
21550 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21551 else
21553 t1 = gen_reg_rtx (V8SFmode);
21554 t2 = gen_reg_rtx (V8SFmode);
21555 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21556 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21557 goto merge_two;
21559 return;
21561 case V4SImode:
21562 /* By combining the two 128-bit input vectors into one 256-bit
21563 input vector, we can use VPERMD and VPERMPS for the full
21564 two-operand shuffle. */
21565 t1 = gen_reg_rtx (V8SImode);
21566 t2 = gen_reg_rtx (V8SImode);
21567 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21568 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21569 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21570 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21571 return;
21573 case V4SFmode:
21574 t1 = gen_reg_rtx (V8SFmode);
21575 t2 = gen_reg_rtx (V8SImode);
21576 mask = gen_lowpart (V4SImode, mask);
21577 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21578 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21579 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21580 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21581 return;
21583 case V32QImode:
21584 t1 = gen_reg_rtx (V32QImode);
21585 t2 = gen_reg_rtx (V32QImode);
21586 t3 = gen_reg_rtx (V32QImode);
21587 vt2 = GEN_INT (-128);
21588 for (i = 0; i < 32; i++)
21589 vec[i] = vt2;
21590 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21591 vt = force_reg (V32QImode, vt);
21592 for (i = 0; i < 32; i++)
21593 vec[i] = i < 16 ? vt2 : const0_rtx;
21594 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21595 vt2 = force_reg (V32QImode, vt2);
21596 /* From mask create two adjusted masks, which contain the same
21597 bits as mask in the low 7 bits of each vector element.
21598 The first mask will have the most significant bit clear
21599 if it requests element from the same 128-bit lane
21600 and MSB set if it requests element from the other 128-bit lane.
21601 The second mask will have the opposite values of the MSB,
21602 and additionally will have its 128-bit lanes swapped.
21603 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21604 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21605 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21606 stands for other 12 bytes. */
21607 /* The bit whether element is from the same lane or the other
21608 lane is bit 4, so shift it up by 3 to the MSB position. */
21609 t5 = gen_reg_rtx (V4DImode);
21610 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21611 GEN_INT (3)));
21612 /* Clear MSB bits from the mask just in case it had them set. */
21613 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21614 /* After this t1 will have MSB set for elements from other lane. */
21615 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21616 /* Clear bits other than MSB. */
21617 emit_insn (gen_andv32qi3 (t1, t1, vt));
21618 /* Or in the lower bits from mask into t3. */
21619 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21620 /* And invert MSB bits in t1, so MSB is set for elements from the same
21621 lane. */
21622 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21623 /* Swap 128-bit lanes in t3. */
21624 t6 = gen_reg_rtx (V4DImode);
21625 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21626 const2_rtx, GEN_INT (3),
21627 const0_rtx, const1_rtx));
21628 /* And or in the lower bits from mask into t1. */
21629 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21630 if (one_operand_shuffle)
21632 /* Each of these shuffles will put 0s in places where
21633 element from the other 128-bit lane is needed, otherwise
21634 will shuffle in the requested value. */
21635 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21636 gen_lowpart (V32QImode, t6)));
21637 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21638 /* For t3 the 128-bit lanes are swapped again. */
21639 t7 = gen_reg_rtx (V4DImode);
21640 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21641 const2_rtx, GEN_INT (3),
21642 const0_rtx, const1_rtx));
21643 /* And oring both together leads to the result. */
21644 emit_insn (gen_iorv32qi3 (target, t1,
21645 gen_lowpart (V32QImode, t7)));
21646 if (target != operands[0])
21647 emit_move_insn (operands[0],
21648 gen_lowpart (GET_MODE (operands[0]), target));
21649 return;
21652 t4 = gen_reg_rtx (V32QImode);
21653 /* Similarly to the above one_operand_shuffle code,
21654 just for repeated twice for each operand. merge_two:
21655 code will merge the two results together. */
21656 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21657 gen_lowpart (V32QImode, t6)));
21658 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21659 gen_lowpart (V32QImode, t6)));
21660 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21661 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21662 t7 = gen_reg_rtx (V4DImode);
21663 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21664 const2_rtx, GEN_INT (3),
21665 const0_rtx, const1_rtx));
21666 t8 = gen_reg_rtx (V4DImode);
21667 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21668 const2_rtx, GEN_INT (3),
21669 const0_rtx, const1_rtx));
21670 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21671 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21672 t1 = t4;
21673 t2 = t3;
21674 goto merge_two;
21676 default:
21677 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21678 break;
21682 if (TARGET_XOP)
21684 /* The XOP VPPERM insn supports three inputs. By ignoring the
21685 one_operand_shuffle special case, we avoid creating another
21686 set of constant vectors in memory. */
21687 one_operand_shuffle = false;
21689 /* mask = mask & {2*w-1, ...} */
21690 vt = GEN_INT (2*w - 1);
21692 else
21694 /* mask = mask & {w-1, ...} */
21695 vt = GEN_INT (w - 1);
21698 for (i = 0; i < w; i++)
21699 vec[i] = vt;
21700 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21701 mask = expand_simple_binop (maskmode, AND, mask, vt,
21702 NULL_RTX, 0, OPTAB_DIRECT);
21704 /* For non-QImode operations, convert the word permutation control
21705 into a byte permutation control. */
21706 if (mode != V16QImode)
21708 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21709 GEN_INT (exact_log2 (e)),
21710 NULL_RTX, 0, OPTAB_DIRECT);
21712 /* Convert mask to vector of chars. */
21713 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21715 /* Replicate each of the input bytes into byte positions:
21716 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21717 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21718 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21719 for (i = 0; i < 16; ++i)
21720 vec[i] = GEN_INT (i/e * e);
21721 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21722 vt = validize_mem (force_const_mem (V16QImode, vt));
21723 if (TARGET_XOP)
21724 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21725 else
21726 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21728 /* Convert it into the byte positions by doing
21729 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21730 for (i = 0; i < 16; ++i)
21731 vec[i] = GEN_INT (i % e);
21732 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21733 vt = validize_mem (force_const_mem (V16QImode, vt));
21734 emit_insn (gen_addv16qi3 (mask, mask, vt));
21737 /* The actual shuffle operations all operate on V16QImode. */
21738 op0 = gen_lowpart (V16QImode, op0);
21739 op1 = gen_lowpart (V16QImode, op1);
21741 if (TARGET_XOP)
21743 if (GET_MODE (target) != V16QImode)
21744 target = gen_reg_rtx (V16QImode);
21745 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21746 if (target != operands[0])
21747 emit_move_insn (operands[0],
21748 gen_lowpart (GET_MODE (operands[0]), target));
21750 else if (one_operand_shuffle)
21752 if (GET_MODE (target) != V16QImode)
21753 target = gen_reg_rtx (V16QImode);
21754 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21755 if (target != operands[0])
21756 emit_move_insn (operands[0],
21757 gen_lowpart (GET_MODE (operands[0]), target));
21759 else
21761 rtx xops[6];
21762 bool ok;
21764 /* Shuffle the two input vectors independently. */
21765 t1 = gen_reg_rtx (V16QImode);
21766 t2 = gen_reg_rtx (V16QImode);
21767 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21768 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21770 merge_two:
21771 /* Then merge them together. The key is whether any given control
21772 element contained a bit set that indicates the second word. */
21773 mask = operands[3];
21774 vt = GEN_INT (w);
21775 if (maskmode == V2DImode && !TARGET_SSE4_1)
21777 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21778 more shuffle to convert the V2DI input mask into a V4SI
21779 input mask. At which point the masking that expand_int_vcond
21780 will work as desired. */
21781 rtx t3 = gen_reg_rtx (V4SImode);
21782 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21783 const0_rtx, const0_rtx,
21784 const2_rtx, const2_rtx));
21785 mask = t3;
21786 maskmode = V4SImode;
21787 e = w = 4;
21790 for (i = 0; i < w; i++)
21791 vec[i] = vt;
21792 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21793 vt = force_reg (maskmode, vt);
21794 mask = expand_simple_binop (maskmode, AND, mask, vt,
21795 NULL_RTX, 0, OPTAB_DIRECT);
21797 if (GET_MODE (target) != mode)
21798 target = gen_reg_rtx (mode);
21799 xops[0] = target;
21800 xops[1] = gen_lowpart (mode, t2);
21801 xops[2] = gen_lowpart (mode, t1);
21802 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21803 xops[4] = mask;
21804 xops[5] = vt;
21805 ok = ix86_expand_int_vcond (xops);
21806 gcc_assert (ok);
21807 if (target != operands[0])
21808 emit_move_insn (operands[0],
21809 gen_lowpart (GET_MODE (operands[0]), target));
21813 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21814 true if we should do zero extension, else sign extension. HIGH_P is
21815 true if we want the N/2 high elements, else the low elements. */
21817 void
21818 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21820 enum machine_mode imode = GET_MODE (src);
21821 rtx tmp;
21823 if (TARGET_SSE4_1)
21825 rtx (*unpack)(rtx, rtx);
21826 rtx (*extract)(rtx, rtx) = NULL;
21827 enum machine_mode halfmode = BLKmode;
21829 switch (imode)
21831 case V32QImode:
21832 if (unsigned_p)
21833 unpack = gen_avx2_zero_extendv16qiv16hi2;
21834 else
21835 unpack = gen_avx2_sign_extendv16qiv16hi2;
21836 halfmode = V16QImode;
21837 extract
21838 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21839 break;
21840 case V32HImode:
21841 if (unsigned_p)
21842 unpack = gen_avx512f_zero_extendv16hiv16si2;
21843 else
21844 unpack = gen_avx512f_sign_extendv16hiv16si2;
21845 halfmode = V16HImode;
21846 extract
21847 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21848 break;
21849 case V16HImode:
21850 if (unsigned_p)
21851 unpack = gen_avx2_zero_extendv8hiv8si2;
21852 else
21853 unpack = gen_avx2_sign_extendv8hiv8si2;
21854 halfmode = V8HImode;
21855 extract
21856 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21857 break;
21858 case V16SImode:
21859 if (unsigned_p)
21860 unpack = gen_avx512f_zero_extendv8siv8di2;
21861 else
21862 unpack = gen_avx512f_sign_extendv8siv8di2;
21863 halfmode = V8SImode;
21864 extract
21865 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21866 break;
21867 case V8SImode:
21868 if (unsigned_p)
21869 unpack = gen_avx2_zero_extendv4siv4di2;
21870 else
21871 unpack = gen_avx2_sign_extendv4siv4di2;
21872 halfmode = V4SImode;
21873 extract
21874 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21875 break;
21876 case V16QImode:
21877 if (unsigned_p)
21878 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21879 else
21880 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21881 break;
21882 case V8HImode:
21883 if (unsigned_p)
21884 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21885 else
21886 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21887 break;
21888 case V4SImode:
21889 if (unsigned_p)
21890 unpack = gen_sse4_1_zero_extendv2siv2di2;
21891 else
21892 unpack = gen_sse4_1_sign_extendv2siv2di2;
21893 break;
21894 default:
21895 gcc_unreachable ();
21898 if (GET_MODE_SIZE (imode) >= 32)
21900 tmp = gen_reg_rtx (halfmode);
21901 emit_insn (extract (tmp, src));
21903 else if (high_p)
21905 /* Shift higher 8 bytes to lower 8 bytes. */
21906 tmp = gen_reg_rtx (V1TImode);
21907 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21908 GEN_INT (64)));
21909 tmp = gen_lowpart (imode, tmp);
21911 else
21912 tmp = src;
21914 emit_insn (unpack (dest, tmp));
21916 else
21918 rtx (*unpack)(rtx, rtx, rtx);
21920 switch (imode)
21922 case V16QImode:
21923 if (high_p)
21924 unpack = gen_vec_interleave_highv16qi;
21925 else
21926 unpack = gen_vec_interleave_lowv16qi;
21927 break;
21928 case V8HImode:
21929 if (high_p)
21930 unpack = gen_vec_interleave_highv8hi;
21931 else
21932 unpack = gen_vec_interleave_lowv8hi;
21933 break;
21934 case V4SImode:
21935 if (high_p)
21936 unpack = gen_vec_interleave_highv4si;
21937 else
21938 unpack = gen_vec_interleave_lowv4si;
21939 break;
21940 default:
21941 gcc_unreachable ();
21944 if (unsigned_p)
21945 tmp = force_reg (imode, CONST0_RTX (imode));
21946 else
21947 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21948 src, pc_rtx, pc_rtx);
21950 rtx tmp2 = gen_reg_rtx (imode);
21951 emit_insn (unpack (tmp2, src, tmp));
21952 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21956 /* Expand conditional increment or decrement using adb/sbb instructions.
21957 The default case using setcc followed by the conditional move can be
21958 done by generic code. */
21959 bool
21960 ix86_expand_int_addcc (rtx operands[])
21962 enum rtx_code code = GET_CODE (operands[1]);
21963 rtx flags;
21964 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21965 rtx compare_op;
21966 rtx val = const0_rtx;
21967 bool fpcmp = false;
21968 enum machine_mode mode;
21969 rtx op0 = XEXP (operands[1], 0);
21970 rtx op1 = XEXP (operands[1], 1);
21972 if (operands[3] != const1_rtx
21973 && operands[3] != constm1_rtx)
21974 return false;
21975 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21976 return false;
21977 code = GET_CODE (compare_op);
21979 flags = XEXP (compare_op, 0);
21981 if (GET_MODE (flags) == CCFPmode
21982 || GET_MODE (flags) == CCFPUmode)
21984 fpcmp = true;
21985 code = ix86_fp_compare_code_to_integer (code);
21988 if (code != LTU)
21990 val = constm1_rtx;
21991 if (fpcmp)
21992 PUT_CODE (compare_op,
21993 reverse_condition_maybe_unordered
21994 (GET_CODE (compare_op)));
21995 else
21996 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21999 mode = GET_MODE (operands[0]);
22001 /* Construct either adc or sbb insn. */
22002 if ((code == LTU) == (operands[3] == constm1_rtx))
22004 switch (mode)
22006 case QImode:
22007 insn = gen_subqi3_carry;
22008 break;
22009 case HImode:
22010 insn = gen_subhi3_carry;
22011 break;
22012 case SImode:
22013 insn = gen_subsi3_carry;
22014 break;
22015 case DImode:
22016 insn = gen_subdi3_carry;
22017 break;
22018 default:
22019 gcc_unreachable ();
22022 else
22024 switch (mode)
22026 case QImode:
22027 insn = gen_addqi3_carry;
22028 break;
22029 case HImode:
22030 insn = gen_addhi3_carry;
22031 break;
22032 case SImode:
22033 insn = gen_addsi3_carry;
22034 break;
22035 case DImode:
22036 insn = gen_adddi3_carry;
22037 break;
22038 default:
22039 gcc_unreachable ();
22042 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22044 return true;
22048 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22049 but works for floating pointer parameters and nonoffsetable memories.
22050 For pushes, it returns just stack offsets; the values will be saved
22051 in the right order. Maximally three parts are generated. */
22053 static int
22054 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22056 int size;
22058 if (!TARGET_64BIT)
22059 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22060 else
22061 size = (GET_MODE_SIZE (mode) + 4) / 8;
22063 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22064 gcc_assert (size >= 2 && size <= 4);
22066 /* Optimize constant pool reference to immediates. This is used by fp
22067 moves, that force all constants to memory to allow combining. */
22068 if (MEM_P (operand) && MEM_READONLY_P (operand))
22070 rtx tmp = maybe_get_pool_constant (operand);
22071 if (tmp)
22072 operand = tmp;
22075 if (MEM_P (operand) && !offsettable_memref_p (operand))
22077 /* The only non-offsetable memories we handle are pushes. */
22078 int ok = push_operand (operand, VOIDmode);
22080 gcc_assert (ok);
22082 operand = copy_rtx (operand);
22083 PUT_MODE (operand, word_mode);
22084 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22085 return size;
22088 if (GET_CODE (operand) == CONST_VECTOR)
22090 enum machine_mode imode = int_mode_for_mode (mode);
22091 /* Caution: if we looked through a constant pool memory above,
22092 the operand may actually have a different mode now. That's
22093 ok, since we want to pun this all the way back to an integer. */
22094 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22095 gcc_assert (operand != NULL);
22096 mode = imode;
22099 if (!TARGET_64BIT)
22101 if (mode == DImode)
22102 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22103 else
22105 int i;
22107 if (REG_P (operand))
22109 gcc_assert (reload_completed);
22110 for (i = 0; i < size; i++)
22111 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22113 else if (offsettable_memref_p (operand))
22115 operand = adjust_address (operand, SImode, 0);
22116 parts[0] = operand;
22117 for (i = 1; i < size; i++)
22118 parts[i] = adjust_address (operand, SImode, 4 * i);
22120 else if (GET_CODE (operand) == CONST_DOUBLE)
22122 REAL_VALUE_TYPE r;
22123 long l[4];
22125 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22126 switch (mode)
22128 case TFmode:
22129 real_to_target (l, &r, mode);
22130 parts[3] = gen_int_mode (l[3], SImode);
22131 parts[2] = gen_int_mode (l[2], SImode);
22132 break;
22133 case XFmode:
22134 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22135 long double may not be 80-bit. */
22136 real_to_target (l, &r, mode);
22137 parts[2] = gen_int_mode (l[2], SImode);
22138 break;
22139 case DFmode:
22140 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22141 break;
22142 default:
22143 gcc_unreachable ();
22145 parts[1] = gen_int_mode (l[1], SImode);
22146 parts[0] = gen_int_mode (l[0], SImode);
22148 else
22149 gcc_unreachable ();
22152 else
22154 if (mode == TImode)
22155 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22156 if (mode == XFmode || mode == TFmode)
22158 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22159 if (REG_P (operand))
22161 gcc_assert (reload_completed);
22162 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22163 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22165 else if (offsettable_memref_p (operand))
22167 operand = adjust_address (operand, DImode, 0);
22168 parts[0] = operand;
22169 parts[1] = adjust_address (operand, upper_mode, 8);
22171 else if (GET_CODE (operand) == CONST_DOUBLE)
22173 REAL_VALUE_TYPE r;
22174 long l[4];
22176 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22177 real_to_target (l, &r, mode);
22179 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22180 if (HOST_BITS_PER_WIDE_INT >= 64)
22181 parts[0]
22182 = gen_int_mode
22183 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22184 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22185 DImode);
22186 else
22187 parts[0] = immed_double_const (l[0], l[1], DImode);
22189 if (upper_mode == SImode)
22190 parts[1] = gen_int_mode (l[2], SImode);
22191 else if (HOST_BITS_PER_WIDE_INT >= 64)
22192 parts[1]
22193 = gen_int_mode
22194 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22195 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22196 DImode);
22197 else
22198 parts[1] = immed_double_const (l[2], l[3], DImode);
22200 else
22201 gcc_unreachable ();
22205 return size;
22208 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22209 Return false when normal moves are needed; true when all required
22210 insns have been emitted. Operands 2-4 contain the input values
22211 int the correct order; operands 5-7 contain the output values. */
22213 void
22214 ix86_split_long_move (rtx operands[])
22216 rtx part[2][4];
22217 int nparts, i, j;
22218 int push = 0;
22219 int collisions = 0;
22220 enum machine_mode mode = GET_MODE (operands[0]);
22221 bool collisionparts[4];
22223 /* The DFmode expanders may ask us to move double.
22224 For 64bit target this is single move. By hiding the fact
22225 here we simplify i386.md splitters. */
22226 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22228 /* Optimize constant pool reference to immediates. This is used by
22229 fp moves, that force all constants to memory to allow combining. */
22231 if (MEM_P (operands[1])
22232 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22233 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22234 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22235 if (push_operand (operands[0], VOIDmode))
22237 operands[0] = copy_rtx (operands[0]);
22238 PUT_MODE (operands[0], word_mode);
22240 else
22241 operands[0] = gen_lowpart (DImode, operands[0]);
22242 operands[1] = gen_lowpart (DImode, operands[1]);
22243 emit_move_insn (operands[0], operands[1]);
22244 return;
22247 /* The only non-offsettable memory we handle is push. */
22248 if (push_operand (operands[0], VOIDmode))
22249 push = 1;
22250 else
22251 gcc_assert (!MEM_P (operands[0])
22252 || offsettable_memref_p (operands[0]));
22254 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22255 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22257 /* When emitting push, take care for source operands on the stack. */
22258 if (push && MEM_P (operands[1])
22259 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22261 rtx src_base = XEXP (part[1][nparts - 1], 0);
22263 /* Compensate for the stack decrement by 4. */
22264 if (!TARGET_64BIT && nparts == 3
22265 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22266 src_base = plus_constant (Pmode, src_base, 4);
22268 /* src_base refers to the stack pointer and is
22269 automatically decreased by emitted push. */
22270 for (i = 0; i < nparts; i++)
22271 part[1][i] = change_address (part[1][i],
22272 GET_MODE (part[1][i]), src_base);
22275 /* We need to do copy in the right order in case an address register
22276 of the source overlaps the destination. */
22277 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22279 rtx tmp;
22281 for (i = 0; i < nparts; i++)
22283 collisionparts[i]
22284 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22285 if (collisionparts[i])
22286 collisions++;
22289 /* Collision in the middle part can be handled by reordering. */
22290 if (collisions == 1 && nparts == 3 && collisionparts [1])
22292 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22293 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22295 else if (collisions == 1
22296 && nparts == 4
22297 && (collisionparts [1] || collisionparts [2]))
22299 if (collisionparts [1])
22301 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22302 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22304 else
22306 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22307 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22311 /* If there are more collisions, we can't handle it by reordering.
22312 Do an lea to the last part and use only one colliding move. */
22313 else if (collisions > 1)
22315 rtx base;
22317 collisions = 1;
22319 base = part[0][nparts - 1];
22321 /* Handle the case when the last part isn't valid for lea.
22322 Happens in 64-bit mode storing the 12-byte XFmode. */
22323 if (GET_MODE (base) != Pmode)
22324 base = gen_rtx_REG (Pmode, REGNO (base));
22326 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22327 part[1][0] = replace_equiv_address (part[1][0], base);
22328 for (i = 1; i < nparts; i++)
22330 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22331 part[1][i] = replace_equiv_address (part[1][i], tmp);
22336 if (push)
22338 if (!TARGET_64BIT)
22340 if (nparts == 3)
22342 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22343 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22344 stack_pointer_rtx, GEN_INT (-4)));
22345 emit_move_insn (part[0][2], part[1][2]);
22347 else if (nparts == 4)
22349 emit_move_insn (part[0][3], part[1][3]);
22350 emit_move_insn (part[0][2], part[1][2]);
22353 else
22355 /* In 64bit mode we don't have 32bit push available. In case this is
22356 register, it is OK - we will just use larger counterpart. We also
22357 retype memory - these comes from attempt to avoid REX prefix on
22358 moving of second half of TFmode value. */
22359 if (GET_MODE (part[1][1]) == SImode)
22361 switch (GET_CODE (part[1][1]))
22363 case MEM:
22364 part[1][1] = adjust_address (part[1][1], DImode, 0);
22365 break;
22367 case REG:
22368 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22369 break;
22371 default:
22372 gcc_unreachable ();
22375 if (GET_MODE (part[1][0]) == SImode)
22376 part[1][0] = part[1][1];
22379 emit_move_insn (part[0][1], part[1][1]);
22380 emit_move_insn (part[0][0], part[1][0]);
22381 return;
22384 /* Choose correct order to not overwrite the source before it is copied. */
22385 if ((REG_P (part[0][0])
22386 && REG_P (part[1][1])
22387 && (REGNO (part[0][0]) == REGNO (part[1][1])
22388 || (nparts == 3
22389 && REGNO (part[0][0]) == REGNO (part[1][2]))
22390 || (nparts == 4
22391 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22392 || (collisions > 0
22393 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22395 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22397 operands[2 + i] = part[0][j];
22398 operands[6 + i] = part[1][j];
22401 else
22403 for (i = 0; i < nparts; i++)
22405 operands[2 + i] = part[0][i];
22406 operands[6 + i] = part[1][i];
22410 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22411 if (optimize_insn_for_size_p ())
22413 for (j = 0; j < nparts - 1; j++)
22414 if (CONST_INT_P (operands[6 + j])
22415 && operands[6 + j] != const0_rtx
22416 && REG_P (operands[2 + j]))
22417 for (i = j; i < nparts - 1; i++)
22418 if (CONST_INT_P (operands[7 + i])
22419 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22420 operands[7 + i] = operands[2 + j];
22423 for (i = 0; i < nparts; i++)
22424 emit_move_insn (operands[2 + i], operands[6 + i]);
22426 return;
22429 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22430 left shift by a constant, either using a single shift or
22431 a sequence of add instructions. */
22433 static void
22434 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22436 rtx (*insn)(rtx, rtx, rtx);
22438 if (count == 1
22439 || (count * ix86_cost->add <= ix86_cost->shift_const
22440 && !optimize_insn_for_size_p ()))
22442 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22443 while (count-- > 0)
22444 emit_insn (insn (operand, operand, operand));
22446 else
22448 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22449 emit_insn (insn (operand, operand, GEN_INT (count)));
22453 void
22454 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22456 rtx (*gen_ashl3)(rtx, rtx, rtx);
22457 rtx (*gen_shld)(rtx, rtx, rtx);
22458 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22460 rtx low[2], high[2];
22461 int count;
22463 if (CONST_INT_P (operands[2]))
22465 split_double_mode (mode, operands, 2, low, high);
22466 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22468 if (count >= half_width)
22470 emit_move_insn (high[0], low[1]);
22471 emit_move_insn (low[0], const0_rtx);
22473 if (count > half_width)
22474 ix86_expand_ashl_const (high[0], count - half_width, mode);
22476 else
22478 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22480 if (!rtx_equal_p (operands[0], operands[1]))
22481 emit_move_insn (operands[0], operands[1]);
22483 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22484 ix86_expand_ashl_const (low[0], count, mode);
22486 return;
22489 split_double_mode (mode, operands, 1, low, high);
22491 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22493 if (operands[1] == const1_rtx)
22495 /* Assuming we've chosen a QImode capable registers, then 1 << N
22496 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22497 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22499 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22501 ix86_expand_clear (low[0]);
22502 ix86_expand_clear (high[0]);
22503 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22505 d = gen_lowpart (QImode, low[0]);
22506 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22507 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22508 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22510 d = gen_lowpart (QImode, high[0]);
22511 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22512 s = gen_rtx_NE (QImode, flags, const0_rtx);
22513 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22516 /* Otherwise, we can get the same results by manually performing
22517 a bit extract operation on bit 5/6, and then performing the two
22518 shifts. The two methods of getting 0/1 into low/high are exactly
22519 the same size. Avoiding the shift in the bit extract case helps
22520 pentium4 a bit; no one else seems to care much either way. */
22521 else
22523 enum machine_mode half_mode;
22524 rtx (*gen_lshr3)(rtx, rtx, rtx);
22525 rtx (*gen_and3)(rtx, rtx, rtx);
22526 rtx (*gen_xor3)(rtx, rtx, rtx);
22527 HOST_WIDE_INT bits;
22528 rtx x;
22530 if (mode == DImode)
22532 half_mode = SImode;
22533 gen_lshr3 = gen_lshrsi3;
22534 gen_and3 = gen_andsi3;
22535 gen_xor3 = gen_xorsi3;
22536 bits = 5;
22538 else
22540 half_mode = DImode;
22541 gen_lshr3 = gen_lshrdi3;
22542 gen_and3 = gen_anddi3;
22543 gen_xor3 = gen_xordi3;
22544 bits = 6;
22547 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22548 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22549 else
22550 x = gen_lowpart (half_mode, operands[2]);
22551 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22553 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22554 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22555 emit_move_insn (low[0], high[0]);
22556 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22559 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22560 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22561 return;
22564 if (operands[1] == constm1_rtx)
22566 /* For -1 << N, we can avoid the shld instruction, because we
22567 know that we're shifting 0...31/63 ones into a -1. */
22568 emit_move_insn (low[0], constm1_rtx);
22569 if (optimize_insn_for_size_p ())
22570 emit_move_insn (high[0], low[0]);
22571 else
22572 emit_move_insn (high[0], constm1_rtx);
22574 else
22576 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22578 if (!rtx_equal_p (operands[0], operands[1]))
22579 emit_move_insn (operands[0], operands[1]);
22581 split_double_mode (mode, operands, 1, low, high);
22582 emit_insn (gen_shld (high[0], low[0], operands[2]));
22585 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22587 if (TARGET_CMOVE && scratch)
22589 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22590 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22592 ix86_expand_clear (scratch);
22593 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22595 else
22597 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22598 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22600 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22604 void
22605 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22607 rtx (*gen_ashr3)(rtx, rtx, rtx)
22608 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22609 rtx (*gen_shrd)(rtx, rtx, rtx);
22610 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22612 rtx low[2], high[2];
22613 int count;
22615 if (CONST_INT_P (operands[2]))
22617 split_double_mode (mode, operands, 2, low, high);
22618 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22620 if (count == GET_MODE_BITSIZE (mode) - 1)
22622 emit_move_insn (high[0], high[1]);
22623 emit_insn (gen_ashr3 (high[0], high[0],
22624 GEN_INT (half_width - 1)));
22625 emit_move_insn (low[0], high[0]);
22628 else if (count >= half_width)
22630 emit_move_insn (low[0], high[1]);
22631 emit_move_insn (high[0], low[0]);
22632 emit_insn (gen_ashr3 (high[0], high[0],
22633 GEN_INT (half_width - 1)));
22635 if (count > half_width)
22636 emit_insn (gen_ashr3 (low[0], low[0],
22637 GEN_INT (count - half_width)));
22639 else
22641 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22643 if (!rtx_equal_p (operands[0], operands[1]))
22644 emit_move_insn (operands[0], operands[1]);
22646 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22647 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22650 else
22652 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22654 if (!rtx_equal_p (operands[0], operands[1]))
22655 emit_move_insn (operands[0], operands[1]);
22657 split_double_mode (mode, operands, 1, low, high);
22659 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22660 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22662 if (TARGET_CMOVE && scratch)
22664 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22665 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22667 emit_move_insn (scratch, high[0]);
22668 emit_insn (gen_ashr3 (scratch, scratch,
22669 GEN_INT (half_width - 1)));
22670 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22671 scratch));
22673 else
22675 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22676 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22678 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22683 void
22684 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22686 rtx (*gen_lshr3)(rtx, rtx, rtx)
22687 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22688 rtx (*gen_shrd)(rtx, rtx, rtx);
22689 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22691 rtx low[2], high[2];
22692 int count;
22694 if (CONST_INT_P (operands[2]))
22696 split_double_mode (mode, operands, 2, low, high);
22697 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22699 if (count >= half_width)
22701 emit_move_insn (low[0], high[1]);
22702 ix86_expand_clear (high[0]);
22704 if (count > half_width)
22705 emit_insn (gen_lshr3 (low[0], low[0],
22706 GEN_INT (count - half_width)));
22708 else
22710 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22712 if (!rtx_equal_p (operands[0], operands[1]))
22713 emit_move_insn (operands[0], operands[1]);
22715 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22716 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22719 else
22721 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22723 if (!rtx_equal_p (operands[0], operands[1]))
22724 emit_move_insn (operands[0], operands[1]);
22726 split_double_mode (mode, operands, 1, low, high);
22728 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22729 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22731 if (TARGET_CMOVE && scratch)
22733 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22734 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22736 ix86_expand_clear (scratch);
22737 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22738 scratch));
22740 else
22742 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22743 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22745 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22750 /* Predict just emitted jump instruction to be taken with probability PROB. */
22751 static void
22752 predict_jump (int prob)
22754 rtx insn = get_last_insn ();
22755 gcc_assert (JUMP_P (insn));
22756 add_int_reg_note (insn, REG_BR_PROB, prob);
22759 /* Helper function for the string operations below. Dest VARIABLE whether
22760 it is aligned to VALUE bytes. If true, jump to the label. */
22761 static rtx_code_label *
22762 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22764 rtx_code_label *label = gen_label_rtx ();
22765 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22766 if (GET_MODE (variable) == DImode)
22767 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22768 else
22769 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22770 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22771 1, label);
22772 if (epilogue)
22773 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22774 else
22775 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22776 return label;
22779 /* Adjust COUNTER by the VALUE. */
22780 static void
22781 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22783 rtx (*gen_add)(rtx, rtx, rtx)
22784 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22786 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22789 /* Zero extend possibly SImode EXP to Pmode register. */
22791 ix86_zero_extend_to_Pmode (rtx exp)
22793 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22796 /* Divide COUNTREG by SCALE. */
22797 static rtx
22798 scale_counter (rtx countreg, int scale)
22800 rtx sc;
22802 if (scale == 1)
22803 return countreg;
22804 if (CONST_INT_P (countreg))
22805 return GEN_INT (INTVAL (countreg) / scale);
22806 gcc_assert (REG_P (countreg));
22808 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22809 GEN_INT (exact_log2 (scale)),
22810 NULL, 1, OPTAB_DIRECT);
22811 return sc;
22814 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22815 DImode for constant loop counts. */
22817 static enum machine_mode
22818 counter_mode (rtx count_exp)
22820 if (GET_MODE (count_exp) != VOIDmode)
22821 return GET_MODE (count_exp);
22822 if (!CONST_INT_P (count_exp))
22823 return Pmode;
22824 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22825 return DImode;
22826 return SImode;
22829 /* Copy the address to a Pmode register. This is used for x32 to
22830 truncate DImode TLS address to a SImode register. */
22832 static rtx
22833 ix86_copy_addr_to_reg (rtx addr)
22835 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22836 return copy_addr_to_reg (addr);
22837 else
22839 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22840 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22844 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22845 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22846 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22847 memory by VALUE (supposed to be in MODE).
22849 The size is rounded down to whole number of chunk size moved at once.
22850 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22853 static void
22854 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22855 rtx destptr, rtx srcptr, rtx value,
22856 rtx count, enum machine_mode mode, int unroll,
22857 int expected_size, bool issetmem)
22859 rtx_code_label *out_label, *top_label;
22860 rtx iter, tmp;
22861 enum machine_mode iter_mode = counter_mode (count);
22862 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22863 rtx piece_size = GEN_INT (piece_size_n);
22864 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22865 rtx size;
22866 int i;
22868 top_label = gen_label_rtx ();
22869 out_label = gen_label_rtx ();
22870 iter = gen_reg_rtx (iter_mode);
22872 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22873 NULL, 1, OPTAB_DIRECT);
22874 /* Those two should combine. */
22875 if (piece_size == const1_rtx)
22877 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22878 true, out_label);
22879 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22881 emit_move_insn (iter, const0_rtx);
22883 emit_label (top_label);
22885 tmp = convert_modes (Pmode, iter_mode, iter, true);
22887 /* This assert could be relaxed - in this case we'll need to compute
22888 smallest power of two, containing in PIECE_SIZE_N and pass it to
22889 offset_address. */
22890 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22891 destmem = offset_address (destmem, tmp, piece_size_n);
22892 destmem = adjust_address (destmem, mode, 0);
22894 if (!issetmem)
22896 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22897 srcmem = adjust_address (srcmem, mode, 0);
22899 /* When unrolling for chips that reorder memory reads and writes,
22900 we can save registers by using single temporary.
22901 Also using 4 temporaries is overkill in 32bit mode. */
22902 if (!TARGET_64BIT && 0)
22904 for (i = 0; i < unroll; i++)
22906 if (i)
22908 destmem =
22909 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22910 srcmem =
22911 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22913 emit_move_insn (destmem, srcmem);
22916 else
22918 rtx tmpreg[4];
22919 gcc_assert (unroll <= 4);
22920 for (i = 0; i < unroll; i++)
22922 tmpreg[i] = gen_reg_rtx (mode);
22923 if (i)
22925 srcmem =
22926 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22928 emit_move_insn (tmpreg[i], srcmem);
22930 for (i = 0; i < unroll; i++)
22932 if (i)
22934 destmem =
22935 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22937 emit_move_insn (destmem, tmpreg[i]);
22941 else
22942 for (i = 0; i < unroll; i++)
22944 if (i)
22945 destmem =
22946 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22947 emit_move_insn (destmem, value);
22950 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22951 true, OPTAB_LIB_WIDEN);
22952 if (tmp != iter)
22953 emit_move_insn (iter, tmp);
22955 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22956 true, top_label);
22957 if (expected_size != -1)
22959 expected_size /= GET_MODE_SIZE (mode) * unroll;
22960 if (expected_size == 0)
22961 predict_jump (0);
22962 else if (expected_size > REG_BR_PROB_BASE)
22963 predict_jump (REG_BR_PROB_BASE - 1);
22964 else
22965 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22967 else
22968 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22969 iter = ix86_zero_extend_to_Pmode (iter);
22970 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22971 true, OPTAB_LIB_WIDEN);
22972 if (tmp != destptr)
22973 emit_move_insn (destptr, tmp);
22974 if (!issetmem)
22976 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22977 true, OPTAB_LIB_WIDEN);
22978 if (tmp != srcptr)
22979 emit_move_insn (srcptr, tmp);
22981 emit_label (out_label);
22984 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22985 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22986 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22987 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22988 ORIG_VALUE is the original value passed to memset to fill the memory with.
22989 Other arguments have same meaning as for previous function. */
22991 static void
22992 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22993 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22994 rtx count,
22995 enum machine_mode mode, bool issetmem)
22997 rtx destexp;
22998 rtx srcexp;
22999 rtx countreg;
23000 HOST_WIDE_INT rounded_count;
23002 /* If possible, it is shorter to use rep movs.
23003 TODO: Maybe it is better to move this logic to decide_alg. */
23004 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23005 && (!issetmem || orig_value == const0_rtx))
23006 mode = SImode;
23008 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23009 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23011 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23012 GET_MODE_SIZE (mode)));
23013 if (mode != QImode)
23015 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23016 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23017 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23019 else
23020 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23021 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23023 rounded_count = (INTVAL (count)
23024 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23025 destmem = shallow_copy_rtx (destmem);
23026 set_mem_size (destmem, rounded_count);
23028 else if (MEM_SIZE_KNOWN_P (destmem))
23029 clear_mem_size (destmem);
23031 if (issetmem)
23033 value = force_reg (mode, gen_lowpart (mode, value));
23034 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23036 else
23038 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23039 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23040 if (mode != QImode)
23042 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23043 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23044 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23046 else
23047 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23048 if (CONST_INT_P (count))
23050 rounded_count = (INTVAL (count)
23051 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23052 srcmem = shallow_copy_rtx (srcmem);
23053 set_mem_size (srcmem, rounded_count);
23055 else
23057 if (MEM_SIZE_KNOWN_P (srcmem))
23058 clear_mem_size (srcmem);
23060 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23061 destexp, srcexp));
23065 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23066 DESTMEM.
23067 SRC is passed by pointer to be updated on return.
23068 Return value is updated DST. */
23069 static rtx
23070 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23071 HOST_WIDE_INT size_to_move)
23073 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23074 enum insn_code code;
23075 enum machine_mode move_mode;
23076 int piece_size, i;
23078 /* Find the widest mode in which we could perform moves.
23079 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23080 it until move of such size is supported. */
23081 piece_size = 1 << floor_log2 (size_to_move);
23082 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23083 code = optab_handler (mov_optab, move_mode);
23084 while (code == CODE_FOR_nothing && piece_size > 1)
23086 piece_size >>= 1;
23087 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23088 code = optab_handler (mov_optab, move_mode);
23091 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23092 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23093 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23095 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23096 move_mode = mode_for_vector (word_mode, nunits);
23097 code = optab_handler (mov_optab, move_mode);
23098 if (code == CODE_FOR_nothing)
23100 move_mode = word_mode;
23101 piece_size = GET_MODE_SIZE (move_mode);
23102 code = optab_handler (mov_optab, move_mode);
23105 gcc_assert (code != CODE_FOR_nothing);
23107 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23108 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23110 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23111 gcc_assert (size_to_move % piece_size == 0);
23112 adjust = GEN_INT (piece_size);
23113 for (i = 0; i < size_to_move; i += piece_size)
23115 /* We move from memory to memory, so we'll need to do it via
23116 a temporary register. */
23117 tempreg = gen_reg_rtx (move_mode);
23118 emit_insn (GEN_FCN (code) (tempreg, src));
23119 emit_insn (GEN_FCN (code) (dst, tempreg));
23121 emit_move_insn (destptr,
23122 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23123 emit_move_insn (srcptr,
23124 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23126 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23127 piece_size);
23128 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23129 piece_size);
23132 /* Update DST and SRC rtx. */
23133 *srcmem = src;
23134 return dst;
23137 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23138 static void
23139 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23140 rtx destptr, rtx srcptr, rtx count, int max_size)
23142 rtx src, dest;
23143 if (CONST_INT_P (count))
23145 HOST_WIDE_INT countval = INTVAL (count);
23146 HOST_WIDE_INT epilogue_size = countval % max_size;
23147 int i;
23149 /* For now MAX_SIZE should be a power of 2. This assert could be
23150 relaxed, but it'll require a bit more complicated epilogue
23151 expanding. */
23152 gcc_assert ((max_size & (max_size - 1)) == 0);
23153 for (i = max_size; i >= 1; i >>= 1)
23155 if (epilogue_size & i)
23156 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23158 return;
23160 if (max_size > 8)
23162 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23163 count, 1, OPTAB_DIRECT);
23164 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23165 count, QImode, 1, 4, false);
23166 return;
23169 /* When there are stringops, we can cheaply increase dest and src pointers.
23170 Otherwise we save code size by maintaining offset (zero is readily
23171 available from preceding rep operation) and using x86 addressing modes.
23173 if (TARGET_SINGLE_STRINGOP)
23175 if (max_size > 4)
23177 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23178 src = change_address (srcmem, SImode, srcptr);
23179 dest = change_address (destmem, SImode, destptr);
23180 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23181 emit_label (label);
23182 LABEL_NUSES (label) = 1;
23184 if (max_size > 2)
23186 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23187 src = change_address (srcmem, HImode, srcptr);
23188 dest = change_address (destmem, HImode, destptr);
23189 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23190 emit_label (label);
23191 LABEL_NUSES (label) = 1;
23193 if (max_size > 1)
23195 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23196 src = change_address (srcmem, QImode, srcptr);
23197 dest = change_address (destmem, QImode, destptr);
23198 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23199 emit_label (label);
23200 LABEL_NUSES (label) = 1;
23203 else
23205 rtx offset = force_reg (Pmode, const0_rtx);
23206 rtx tmp;
23208 if (max_size > 4)
23210 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23211 src = change_address (srcmem, SImode, srcptr);
23212 dest = change_address (destmem, SImode, destptr);
23213 emit_move_insn (dest, src);
23214 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23215 true, OPTAB_LIB_WIDEN);
23216 if (tmp != offset)
23217 emit_move_insn (offset, tmp);
23218 emit_label (label);
23219 LABEL_NUSES (label) = 1;
23221 if (max_size > 2)
23223 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23224 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23225 src = change_address (srcmem, HImode, tmp);
23226 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23227 dest = change_address (destmem, HImode, tmp);
23228 emit_move_insn (dest, src);
23229 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23230 true, OPTAB_LIB_WIDEN);
23231 if (tmp != offset)
23232 emit_move_insn (offset, tmp);
23233 emit_label (label);
23234 LABEL_NUSES (label) = 1;
23236 if (max_size > 1)
23238 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23239 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23240 src = change_address (srcmem, QImode, tmp);
23241 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23242 dest = change_address (destmem, QImode, tmp);
23243 emit_move_insn (dest, src);
23244 emit_label (label);
23245 LABEL_NUSES (label) = 1;
23250 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23251 with value PROMOTED_VAL.
23252 SRC is passed by pointer to be updated on return.
23253 Return value is updated DST. */
23254 static rtx
23255 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23256 HOST_WIDE_INT size_to_move)
23258 rtx dst = destmem, adjust;
23259 enum insn_code code;
23260 enum machine_mode move_mode;
23261 int piece_size, i;
23263 /* Find the widest mode in which we could perform moves.
23264 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23265 it until move of such size is supported. */
23266 move_mode = GET_MODE (promoted_val);
23267 if (move_mode == VOIDmode)
23268 move_mode = QImode;
23269 if (size_to_move < GET_MODE_SIZE (move_mode))
23271 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23272 promoted_val = gen_lowpart (move_mode, promoted_val);
23274 piece_size = GET_MODE_SIZE (move_mode);
23275 code = optab_handler (mov_optab, move_mode);
23276 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23278 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23280 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23281 gcc_assert (size_to_move % piece_size == 0);
23282 adjust = GEN_INT (piece_size);
23283 for (i = 0; i < size_to_move; i += piece_size)
23285 if (piece_size <= GET_MODE_SIZE (word_mode))
23287 emit_insn (gen_strset (destptr, dst, promoted_val));
23288 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23289 piece_size);
23290 continue;
23293 emit_insn (GEN_FCN (code) (dst, promoted_val));
23295 emit_move_insn (destptr,
23296 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23298 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23299 piece_size);
23302 /* Update DST rtx. */
23303 return dst;
23305 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23306 static void
23307 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23308 rtx count, int max_size)
23310 count =
23311 expand_simple_binop (counter_mode (count), AND, count,
23312 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23313 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23314 gen_lowpart (QImode, value), count, QImode,
23315 1, max_size / 2, true);
23318 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23319 static void
23320 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23321 rtx count, int max_size)
23323 rtx dest;
23325 if (CONST_INT_P (count))
23327 HOST_WIDE_INT countval = INTVAL (count);
23328 HOST_WIDE_INT epilogue_size = countval % max_size;
23329 int i;
23331 /* For now MAX_SIZE should be a power of 2. This assert could be
23332 relaxed, but it'll require a bit more complicated epilogue
23333 expanding. */
23334 gcc_assert ((max_size & (max_size - 1)) == 0);
23335 for (i = max_size; i >= 1; i >>= 1)
23337 if (epilogue_size & i)
23339 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23340 destmem = emit_memset (destmem, destptr, vec_value, i);
23341 else
23342 destmem = emit_memset (destmem, destptr, value, i);
23345 return;
23347 if (max_size > 32)
23349 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23350 return;
23352 if (max_size > 16)
23354 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23355 if (TARGET_64BIT)
23357 dest = change_address (destmem, DImode, destptr);
23358 emit_insn (gen_strset (destptr, dest, value));
23359 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23360 emit_insn (gen_strset (destptr, dest, value));
23362 else
23364 dest = change_address (destmem, SImode, destptr);
23365 emit_insn (gen_strset (destptr, dest, value));
23366 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23367 emit_insn (gen_strset (destptr, dest, value));
23368 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23369 emit_insn (gen_strset (destptr, dest, value));
23370 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23371 emit_insn (gen_strset (destptr, dest, value));
23373 emit_label (label);
23374 LABEL_NUSES (label) = 1;
23376 if (max_size > 8)
23378 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23379 if (TARGET_64BIT)
23381 dest = change_address (destmem, DImode, destptr);
23382 emit_insn (gen_strset (destptr, dest, value));
23384 else
23386 dest = change_address (destmem, SImode, destptr);
23387 emit_insn (gen_strset (destptr, dest, value));
23388 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23389 emit_insn (gen_strset (destptr, dest, value));
23391 emit_label (label);
23392 LABEL_NUSES (label) = 1;
23394 if (max_size > 4)
23396 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23397 dest = change_address (destmem, SImode, destptr);
23398 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23399 emit_label (label);
23400 LABEL_NUSES (label) = 1;
23402 if (max_size > 2)
23404 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23405 dest = change_address (destmem, HImode, destptr);
23406 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23407 emit_label (label);
23408 LABEL_NUSES (label) = 1;
23410 if (max_size > 1)
23412 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23413 dest = change_address (destmem, QImode, destptr);
23414 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23415 emit_label (label);
23416 LABEL_NUSES (label) = 1;
23420 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23421 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23422 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23423 ignored.
23424 Return value is updated DESTMEM. */
23425 static rtx
23426 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23427 rtx destptr, rtx srcptr, rtx value,
23428 rtx vec_value, rtx count, int align,
23429 int desired_alignment, bool issetmem)
23431 int i;
23432 for (i = 1; i < desired_alignment; i <<= 1)
23434 if (align <= i)
23436 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23437 if (issetmem)
23439 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23440 destmem = emit_memset (destmem, destptr, vec_value, i);
23441 else
23442 destmem = emit_memset (destmem, destptr, value, i);
23444 else
23445 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23446 ix86_adjust_counter (count, i);
23447 emit_label (label);
23448 LABEL_NUSES (label) = 1;
23449 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23452 return destmem;
23455 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23456 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23457 and jump to DONE_LABEL. */
23458 static void
23459 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23460 rtx destptr, rtx srcptr,
23461 rtx value, rtx vec_value,
23462 rtx count, int size,
23463 rtx done_label, bool issetmem)
23465 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23466 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23467 rtx modesize;
23468 int n;
23470 /* If we do not have vector value to copy, we must reduce size. */
23471 if (issetmem)
23473 if (!vec_value)
23475 if (GET_MODE (value) == VOIDmode && size > 8)
23476 mode = Pmode;
23477 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23478 mode = GET_MODE (value);
23480 else
23481 mode = GET_MODE (vec_value), value = vec_value;
23483 else
23485 /* Choose appropriate vector mode. */
23486 if (size >= 32)
23487 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23488 else if (size >= 16)
23489 mode = TARGET_SSE ? V16QImode : DImode;
23490 srcmem = change_address (srcmem, mode, srcptr);
23492 destmem = change_address (destmem, mode, destptr);
23493 modesize = GEN_INT (GET_MODE_SIZE (mode));
23494 gcc_assert (GET_MODE_SIZE (mode) <= size);
23495 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23497 if (issetmem)
23498 emit_move_insn (destmem, gen_lowpart (mode, value));
23499 else
23501 emit_move_insn (destmem, srcmem);
23502 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23504 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23507 destmem = offset_address (destmem, count, 1);
23508 destmem = offset_address (destmem, GEN_INT (-2 * size),
23509 GET_MODE_SIZE (mode));
23510 if (!issetmem)
23512 srcmem = offset_address (srcmem, count, 1);
23513 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23514 GET_MODE_SIZE (mode));
23516 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23518 if (issetmem)
23519 emit_move_insn (destmem, gen_lowpart (mode, value));
23520 else
23522 emit_move_insn (destmem, srcmem);
23523 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23525 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23527 emit_jump_insn (gen_jump (done_label));
23528 emit_barrier ();
23530 emit_label (label);
23531 LABEL_NUSES (label) = 1;
23534 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23535 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23536 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23537 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23538 DONE_LABEL is a label after the whole copying sequence. The label is created
23539 on demand if *DONE_LABEL is NULL.
23540 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23541 bounds after the initial copies.
23543 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23544 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23545 we will dispatch to a library call for large blocks.
23547 In pseudocode we do:
23549 if (COUNT < SIZE)
23551 Assume that SIZE is 4. Bigger sizes are handled analogously
23552 if (COUNT & 4)
23554 copy 4 bytes from SRCPTR to DESTPTR
23555 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23556 goto done_label
23558 if (!COUNT)
23559 goto done_label;
23560 copy 1 byte from SRCPTR to DESTPTR
23561 if (COUNT & 2)
23563 copy 2 bytes from SRCPTR to DESTPTR
23564 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23567 else
23569 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23570 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23572 OLD_DESPTR = DESTPTR;
23573 Align DESTPTR up to DESIRED_ALIGN
23574 SRCPTR += DESTPTR - OLD_DESTPTR
23575 COUNT -= DEST_PTR - OLD_DESTPTR
23576 if (DYNAMIC_CHECK)
23577 Round COUNT down to multiple of SIZE
23578 << optional caller supplied zero size guard is here >>
23579 << optional caller suppplied dynamic check is here >>
23580 << caller supplied main copy loop is here >>
23582 done_label:
23584 static void
23585 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23586 rtx *destptr, rtx *srcptr,
23587 enum machine_mode mode,
23588 rtx value, rtx vec_value,
23589 rtx *count,
23590 rtx_code_label **done_label,
23591 int size,
23592 int desired_align,
23593 int align,
23594 unsigned HOST_WIDE_INT *min_size,
23595 bool dynamic_check,
23596 bool issetmem)
23598 rtx_code_label *loop_label = NULL, *label;
23599 int n;
23600 rtx modesize;
23601 int prolog_size = 0;
23602 rtx mode_value;
23604 /* Chose proper value to copy. */
23605 if (issetmem && VECTOR_MODE_P (mode))
23606 mode_value = vec_value;
23607 else
23608 mode_value = value;
23609 gcc_assert (GET_MODE_SIZE (mode) <= size);
23611 /* See if block is big or small, handle small blocks. */
23612 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23614 int size2 = size;
23615 loop_label = gen_label_rtx ();
23617 if (!*done_label)
23618 *done_label = gen_label_rtx ();
23620 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23621 1, loop_label);
23622 size2 >>= 1;
23624 /* Handle sizes > 3. */
23625 for (;size2 > 2; size2 >>= 1)
23626 expand_small_movmem_or_setmem (destmem, srcmem,
23627 *destptr, *srcptr,
23628 value, vec_value,
23629 *count,
23630 size2, *done_label, issetmem);
23631 /* Nothing to copy? Jump to DONE_LABEL if so */
23632 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23633 1, *done_label);
23635 /* Do a byte copy. */
23636 destmem = change_address (destmem, QImode, *destptr);
23637 if (issetmem)
23638 emit_move_insn (destmem, gen_lowpart (QImode, value));
23639 else
23641 srcmem = change_address (srcmem, QImode, *srcptr);
23642 emit_move_insn (destmem, srcmem);
23645 /* Handle sizes 2 and 3. */
23646 label = ix86_expand_aligntest (*count, 2, false);
23647 destmem = change_address (destmem, HImode, *destptr);
23648 destmem = offset_address (destmem, *count, 1);
23649 destmem = offset_address (destmem, GEN_INT (-2), 2);
23650 if (issetmem)
23651 emit_move_insn (destmem, gen_lowpart (HImode, value));
23652 else
23654 srcmem = change_address (srcmem, HImode, *srcptr);
23655 srcmem = offset_address (srcmem, *count, 1);
23656 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23657 emit_move_insn (destmem, srcmem);
23660 emit_label (label);
23661 LABEL_NUSES (label) = 1;
23662 emit_jump_insn (gen_jump (*done_label));
23663 emit_barrier ();
23665 else
23666 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23667 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23669 /* Start memcpy for COUNT >= SIZE. */
23670 if (loop_label)
23672 emit_label (loop_label);
23673 LABEL_NUSES (loop_label) = 1;
23676 /* Copy first desired_align bytes. */
23677 if (!issetmem)
23678 srcmem = change_address (srcmem, mode, *srcptr);
23679 destmem = change_address (destmem, mode, *destptr);
23680 modesize = GEN_INT (GET_MODE_SIZE (mode));
23681 for (n = 0; prolog_size < desired_align - align; n++)
23683 if (issetmem)
23684 emit_move_insn (destmem, mode_value);
23685 else
23687 emit_move_insn (destmem, srcmem);
23688 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23690 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23691 prolog_size += GET_MODE_SIZE (mode);
23695 /* Copy last SIZE bytes. */
23696 destmem = offset_address (destmem, *count, 1);
23697 destmem = offset_address (destmem,
23698 GEN_INT (-size - prolog_size),
23700 if (issetmem)
23701 emit_move_insn (destmem, mode_value);
23702 else
23704 srcmem = offset_address (srcmem, *count, 1);
23705 srcmem = offset_address (srcmem,
23706 GEN_INT (-size - prolog_size),
23708 emit_move_insn (destmem, srcmem);
23710 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23712 destmem = offset_address (destmem, modesize, 1);
23713 if (issetmem)
23714 emit_move_insn (destmem, mode_value);
23715 else
23717 srcmem = offset_address (srcmem, modesize, 1);
23718 emit_move_insn (destmem, srcmem);
23722 /* Align destination. */
23723 if (desired_align > 1 && desired_align > align)
23725 rtx saveddest = *destptr;
23727 gcc_assert (desired_align <= size);
23728 /* Align destptr up, place it to new register. */
23729 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23730 GEN_INT (prolog_size),
23731 NULL_RTX, 1, OPTAB_DIRECT);
23732 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23733 GEN_INT (-desired_align),
23734 *destptr, 1, OPTAB_DIRECT);
23735 /* See how many bytes we skipped. */
23736 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23737 *destptr,
23738 saveddest, 1, OPTAB_DIRECT);
23739 /* Adjust srcptr and count. */
23740 if (!issetmem)
23741 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23742 *srcptr, 1, OPTAB_DIRECT);
23743 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23744 saveddest, *count, 1, OPTAB_DIRECT);
23745 /* We copied at most size + prolog_size. */
23746 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23747 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23748 else
23749 *min_size = 0;
23751 /* Our loops always round down the bock size, but for dispatch to library
23752 we need precise value. */
23753 if (dynamic_check)
23754 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23755 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23757 else
23759 gcc_assert (prolog_size == 0);
23760 /* Decrease count, so we won't end up copying last word twice. */
23761 if (!CONST_INT_P (*count))
23762 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23763 constm1_rtx, *count, 1, OPTAB_DIRECT);
23764 else
23765 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23766 if (*min_size)
23767 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23772 /* This function is like the previous one, except here we know how many bytes
23773 need to be copied. That allows us to update alignment not only of DST, which
23774 is returned, but also of SRC, which is passed as a pointer for that
23775 reason. */
23776 static rtx
23777 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23778 rtx srcreg, rtx value, rtx vec_value,
23779 int desired_align, int align_bytes,
23780 bool issetmem)
23782 rtx src = NULL;
23783 rtx orig_dst = dst;
23784 rtx orig_src = NULL;
23785 int piece_size = 1;
23786 int copied_bytes = 0;
23788 if (!issetmem)
23790 gcc_assert (srcp != NULL);
23791 src = *srcp;
23792 orig_src = src;
23795 for (piece_size = 1;
23796 piece_size <= desired_align && copied_bytes < align_bytes;
23797 piece_size <<= 1)
23799 if (align_bytes & piece_size)
23801 if (issetmem)
23803 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23804 dst = emit_memset (dst, destreg, vec_value, piece_size);
23805 else
23806 dst = emit_memset (dst, destreg, value, piece_size);
23808 else
23809 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23810 copied_bytes += piece_size;
23813 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23814 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23815 if (MEM_SIZE_KNOWN_P (orig_dst))
23816 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23818 if (!issetmem)
23820 int src_align_bytes = get_mem_align_offset (src, desired_align
23821 * BITS_PER_UNIT);
23822 if (src_align_bytes >= 0)
23823 src_align_bytes = desired_align - src_align_bytes;
23824 if (src_align_bytes >= 0)
23826 unsigned int src_align;
23827 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23829 if ((src_align_bytes & (src_align - 1))
23830 == (align_bytes & (src_align - 1)))
23831 break;
23833 if (src_align > (unsigned int) desired_align)
23834 src_align = desired_align;
23835 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23836 set_mem_align (src, src_align * BITS_PER_UNIT);
23838 if (MEM_SIZE_KNOWN_P (orig_src))
23839 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23840 *srcp = src;
23843 return dst;
23846 /* Return true if ALG can be used in current context.
23847 Assume we expand memset if MEMSET is true. */
23848 static bool
23849 alg_usable_p (enum stringop_alg alg, bool memset)
23851 if (alg == no_stringop)
23852 return false;
23853 if (alg == vector_loop)
23854 return TARGET_SSE || TARGET_AVX;
23855 /* Algorithms using the rep prefix want at least edi and ecx;
23856 additionally, memset wants eax and memcpy wants esi. Don't
23857 consider such algorithms if the user has appropriated those
23858 registers for their own purposes. */
23859 if (alg == rep_prefix_1_byte
23860 || alg == rep_prefix_4_byte
23861 || alg == rep_prefix_8_byte)
23862 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23863 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23864 return true;
23867 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23868 static enum stringop_alg
23869 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23870 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23871 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23873 const struct stringop_algs * algs;
23874 bool optimize_for_speed;
23875 int max = 0;
23876 const struct processor_costs *cost;
23877 int i;
23878 bool any_alg_usable_p = false;
23880 *noalign = false;
23881 *dynamic_check = -1;
23883 /* Even if the string operation call is cold, we still might spend a lot
23884 of time processing large blocks. */
23885 if (optimize_function_for_size_p (cfun)
23886 || (optimize_insn_for_size_p ()
23887 && (max_size < 256
23888 || (expected_size != -1 && expected_size < 256))))
23889 optimize_for_speed = false;
23890 else
23891 optimize_for_speed = true;
23893 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23894 if (memset)
23895 algs = &cost->memset[TARGET_64BIT != 0];
23896 else
23897 algs = &cost->memcpy[TARGET_64BIT != 0];
23899 /* See maximal size for user defined algorithm. */
23900 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23902 enum stringop_alg candidate = algs->size[i].alg;
23903 bool usable = alg_usable_p (candidate, memset);
23904 any_alg_usable_p |= usable;
23906 if (candidate != libcall && candidate && usable)
23907 max = algs->size[i].max;
23910 /* If expected size is not known but max size is small enough
23911 so inline version is a win, set expected size into
23912 the range. */
23913 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23914 && expected_size == -1)
23915 expected_size = min_size / 2 + max_size / 2;
23917 /* If user specified the algorithm, honnor it if possible. */
23918 if (ix86_stringop_alg != no_stringop
23919 && alg_usable_p (ix86_stringop_alg, memset))
23920 return ix86_stringop_alg;
23921 /* rep; movq or rep; movl is the smallest variant. */
23922 else if (!optimize_for_speed)
23924 *noalign = true;
23925 if (!count || (count & 3) || (memset && !zero_memset))
23926 return alg_usable_p (rep_prefix_1_byte, memset)
23927 ? rep_prefix_1_byte : loop_1_byte;
23928 else
23929 return alg_usable_p (rep_prefix_4_byte, memset)
23930 ? rep_prefix_4_byte : loop;
23932 /* Very tiny blocks are best handled via the loop, REP is expensive to
23933 setup. */
23934 else if (expected_size != -1 && expected_size < 4)
23935 return loop_1_byte;
23936 else if (expected_size != -1)
23938 enum stringop_alg alg = libcall;
23939 bool alg_noalign = false;
23940 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23942 /* We get here if the algorithms that were not libcall-based
23943 were rep-prefix based and we are unable to use rep prefixes
23944 based on global register usage. Break out of the loop and
23945 use the heuristic below. */
23946 if (algs->size[i].max == 0)
23947 break;
23948 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23950 enum stringop_alg candidate = algs->size[i].alg;
23952 if (candidate != libcall && alg_usable_p (candidate, memset))
23954 alg = candidate;
23955 alg_noalign = algs->size[i].noalign;
23957 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23958 last non-libcall inline algorithm. */
23959 if (TARGET_INLINE_ALL_STRINGOPS)
23961 /* When the current size is best to be copied by a libcall,
23962 but we are still forced to inline, run the heuristic below
23963 that will pick code for medium sized blocks. */
23964 if (alg != libcall)
23966 *noalign = alg_noalign;
23967 return alg;
23969 break;
23971 else if (alg_usable_p (candidate, memset))
23973 *noalign = algs->size[i].noalign;
23974 return candidate;
23979 /* When asked to inline the call anyway, try to pick meaningful choice.
23980 We look for maximal size of block that is faster to copy by hand and
23981 take blocks of at most of that size guessing that average size will
23982 be roughly half of the block.
23984 If this turns out to be bad, we might simply specify the preferred
23985 choice in ix86_costs. */
23986 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23987 && (algs->unknown_size == libcall
23988 || !alg_usable_p (algs->unknown_size, memset)))
23990 enum stringop_alg alg;
23992 /* If there aren't any usable algorithms, then recursing on
23993 smaller sizes isn't going to find anything. Just return the
23994 simple byte-at-a-time copy loop. */
23995 if (!any_alg_usable_p)
23997 /* Pick something reasonable. */
23998 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23999 *dynamic_check = 128;
24000 return loop_1_byte;
24002 if (max <= 0)
24003 max = 4096;
24004 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24005 zero_memset, dynamic_check, noalign);
24006 gcc_assert (*dynamic_check == -1);
24007 gcc_assert (alg != libcall);
24008 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24009 *dynamic_check = max;
24010 return alg;
24012 return (alg_usable_p (algs->unknown_size, memset)
24013 ? algs->unknown_size : libcall);
24016 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24017 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24018 static int
24019 decide_alignment (int align,
24020 enum stringop_alg alg,
24021 int expected_size,
24022 enum machine_mode move_mode)
24024 int desired_align = 0;
24026 gcc_assert (alg != no_stringop);
24028 if (alg == libcall)
24029 return 0;
24030 if (move_mode == VOIDmode)
24031 return 0;
24033 desired_align = GET_MODE_SIZE (move_mode);
24034 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24035 copying whole cacheline at once. */
24036 if (TARGET_PENTIUMPRO
24037 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24038 desired_align = 8;
24040 if (optimize_size)
24041 desired_align = 1;
24042 if (desired_align < align)
24043 desired_align = align;
24044 if (expected_size != -1 && expected_size < 4)
24045 desired_align = align;
24047 return desired_align;
24051 /* Helper function for memcpy. For QImode value 0xXY produce
24052 0xXYXYXYXY of wide specified by MODE. This is essentially
24053 a * 0x10101010, but we can do slightly better than
24054 synth_mult by unwinding the sequence by hand on CPUs with
24055 slow multiply. */
24056 static rtx
24057 promote_duplicated_reg (enum machine_mode mode, rtx val)
24059 enum machine_mode valmode = GET_MODE (val);
24060 rtx tmp;
24061 int nops = mode == DImode ? 3 : 2;
24063 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24064 if (val == const0_rtx)
24065 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24066 if (CONST_INT_P (val))
24068 HOST_WIDE_INT v = INTVAL (val) & 255;
24070 v |= v << 8;
24071 v |= v << 16;
24072 if (mode == DImode)
24073 v |= (v << 16) << 16;
24074 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24077 if (valmode == VOIDmode)
24078 valmode = QImode;
24079 if (valmode != QImode)
24080 val = gen_lowpart (QImode, val);
24081 if (mode == QImode)
24082 return val;
24083 if (!TARGET_PARTIAL_REG_STALL)
24084 nops--;
24085 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24086 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24087 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24088 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24090 rtx reg = convert_modes (mode, QImode, val, true);
24091 tmp = promote_duplicated_reg (mode, const1_rtx);
24092 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24093 OPTAB_DIRECT);
24095 else
24097 rtx reg = convert_modes (mode, QImode, val, true);
24099 if (!TARGET_PARTIAL_REG_STALL)
24100 if (mode == SImode)
24101 emit_insn (gen_movsi_insv_1 (reg, reg));
24102 else
24103 emit_insn (gen_movdi_insv_1 (reg, reg));
24104 else
24106 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24107 NULL, 1, OPTAB_DIRECT);
24108 reg =
24109 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24111 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24112 NULL, 1, OPTAB_DIRECT);
24113 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24114 if (mode == SImode)
24115 return reg;
24116 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24117 NULL, 1, OPTAB_DIRECT);
24118 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24119 return reg;
24123 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24124 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24125 alignment from ALIGN to DESIRED_ALIGN. */
24126 static rtx
24127 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24128 int align)
24130 rtx promoted_val;
24132 if (TARGET_64BIT
24133 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24134 promoted_val = promote_duplicated_reg (DImode, val);
24135 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24136 promoted_val = promote_duplicated_reg (SImode, val);
24137 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24138 promoted_val = promote_duplicated_reg (HImode, val);
24139 else
24140 promoted_val = val;
24142 return promoted_val;
24145 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24146 operations when profitable. The code depends upon architecture, block size
24147 and alignment, but always has one of the following overall structures:
24149 Aligned move sequence:
24151 1) Prologue guard: Conditional that jumps up to epilogues for small
24152 blocks that can be handled by epilogue alone. This is faster
24153 but also needed for correctness, since prologue assume the block
24154 is larger than the desired alignment.
24156 Optional dynamic check for size and libcall for large
24157 blocks is emitted here too, with -minline-stringops-dynamically.
24159 2) Prologue: copy first few bytes in order to get destination
24160 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24161 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24162 copied. We emit either a jump tree on power of two sized
24163 blocks, or a byte loop.
24165 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24166 with specified algorithm.
24168 4) Epilogue: code copying tail of the block that is too small to be
24169 handled by main body (or up to size guarded by prologue guard).
24171 Misaligned move sequence
24173 1) missaligned move prologue/epilogue containing:
24174 a) Prologue handling small memory blocks and jumping to done_label
24175 (skipped if blocks are known to be large enough)
24176 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24177 needed by single possibly misaligned move
24178 (skipped if alignment is not needed)
24179 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24181 2) Zero size guard dispatching to done_label, if needed
24183 3) dispatch to library call, if needed,
24185 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24186 with specified algorithm. */
24187 bool
24188 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24189 rtx align_exp, rtx expected_align_exp,
24190 rtx expected_size_exp, rtx min_size_exp,
24191 rtx max_size_exp, rtx probable_max_size_exp,
24192 bool issetmem)
24194 rtx destreg;
24195 rtx srcreg = NULL;
24196 rtx_code_label *label = NULL;
24197 rtx tmp;
24198 rtx_code_label *jump_around_label = NULL;
24199 HOST_WIDE_INT align = 1;
24200 unsigned HOST_WIDE_INT count = 0;
24201 HOST_WIDE_INT expected_size = -1;
24202 int size_needed = 0, epilogue_size_needed;
24203 int desired_align = 0, align_bytes = 0;
24204 enum stringop_alg alg;
24205 rtx promoted_val = NULL;
24206 rtx vec_promoted_val = NULL;
24207 bool force_loopy_epilogue = false;
24208 int dynamic_check;
24209 bool need_zero_guard = false;
24210 bool noalign;
24211 enum machine_mode move_mode = VOIDmode;
24212 int unroll_factor = 1;
24213 /* TODO: Once value ranges are available, fill in proper data. */
24214 unsigned HOST_WIDE_INT min_size = 0;
24215 unsigned HOST_WIDE_INT max_size = -1;
24216 unsigned HOST_WIDE_INT probable_max_size = -1;
24217 bool misaligned_prologue_used = false;
24219 if (CONST_INT_P (align_exp))
24220 align = INTVAL (align_exp);
24221 /* i386 can do misaligned access on reasonably increased cost. */
24222 if (CONST_INT_P (expected_align_exp)
24223 && INTVAL (expected_align_exp) > align)
24224 align = INTVAL (expected_align_exp);
24225 /* ALIGN is the minimum of destination and source alignment, but we care here
24226 just about destination alignment. */
24227 else if (!issetmem
24228 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24229 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24231 if (CONST_INT_P (count_exp))
24233 min_size = max_size = probable_max_size = count = expected_size
24234 = INTVAL (count_exp);
24235 /* When COUNT is 0, there is nothing to do. */
24236 if (!count)
24237 return true;
24239 else
24241 if (min_size_exp)
24242 min_size = INTVAL (min_size_exp);
24243 if (max_size_exp)
24244 max_size = INTVAL (max_size_exp);
24245 if (probable_max_size_exp)
24246 probable_max_size = INTVAL (probable_max_size_exp);
24247 if (CONST_INT_P (expected_size_exp))
24248 expected_size = INTVAL (expected_size_exp);
24251 /* Make sure we don't need to care about overflow later on. */
24252 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24253 return false;
24255 /* Step 0: Decide on preferred algorithm, desired alignment and
24256 size of chunks to be copied by main loop. */
24257 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24258 issetmem,
24259 issetmem && val_exp == const0_rtx,
24260 &dynamic_check, &noalign);
24261 if (alg == libcall)
24262 return false;
24263 gcc_assert (alg != no_stringop);
24265 /* For now vector-version of memset is generated only for memory zeroing, as
24266 creating of promoted vector value is very cheap in this case. */
24267 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24268 alg = unrolled_loop;
24270 if (!count)
24271 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24272 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24273 if (!issetmem)
24274 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24276 unroll_factor = 1;
24277 move_mode = word_mode;
24278 switch (alg)
24280 case libcall:
24281 case no_stringop:
24282 case last_alg:
24283 gcc_unreachable ();
24284 case loop_1_byte:
24285 need_zero_guard = true;
24286 move_mode = QImode;
24287 break;
24288 case loop:
24289 need_zero_guard = true;
24290 break;
24291 case unrolled_loop:
24292 need_zero_guard = true;
24293 unroll_factor = (TARGET_64BIT ? 4 : 2);
24294 break;
24295 case vector_loop:
24296 need_zero_guard = true;
24297 unroll_factor = 4;
24298 /* Find the widest supported mode. */
24299 move_mode = word_mode;
24300 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24301 != CODE_FOR_nothing)
24302 move_mode = GET_MODE_WIDER_MODE (move_mode);
24304 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24305 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24306 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24308 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24309 move_mode = mode_for_vector (word_mode, nunits);
24310 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24311 move_mode = word_mode;
24313 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24314 break;
24315 case rep_prefix_8_byte:
24316 move_mode = DImode;
24317 break;
24318 case rep_prefix_4_byte:
24319 move_mode = SImode;
24320 break;
24321 case rep_prefix_1_byte:
24322 move_mode = QImode;
24323 break;
24325 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24326 epilogue_size_needed = size_needed;
24328 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24329 if (!TARGET_ALIGN_STRINGOPS || noalign)
24330 align = desired_align;
24332 /* Step 1: Prologue guard. */
24334 /* Alignment code needs count to be in register. */
24335 if (CONST_INT_P (count_exp) && desired_align > align)
24337 if (INTVAL (count_exp) > desired_align
24338 && INTVAL (count_exp) > size_needed)
24340 align_bytes
24341 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24342 if (align_bytes <= 0)
24343 align_bytes = 0;
24344 else
24345 align_bytes = desired_align - align_bytes;
24347 if (align_bytes == 0)
24348 count_exp = force_reg (counter_mode (count_exp), count_exp);
24350 gcc_assert (desired_align >= 1 && align >= 1);
24352 /* Misaligned move sequences handle both prologue and epilogue at once.
24353 Default code generation results in a smaller code for large alignments
24354 and also avoids redundant job when sizes are known precisely. */
24355 misaligned_prologue_used
24356 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24357 && MAX (desired_align, epilogue_size_needed) <= 32
24358 && desired_align <= epilogue_size_needed
24359 && ((desired_align > align && !align_bytes)
24360 || (!count && epilogue_size_needed > 1)));
24362 /* Do the cheap promotion to allow better CSE across the
24363 main loop and epilogue (ie one load of the big constant in the
24364 front of all code.
24365 For now the misaligned move sequences do not have fast path
24366 without broadcasting. */
24367 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24369 if (alg == vector_loop)
24371 gcc_assert (val_exp == const0_rtx);
24372 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24373 promoted_val = promote_duplicated_reg_to_size (val_exp,
24374 GET_MODE_SIZE (word_mode),
24375 desired_align, align);
24377 else
24379 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24380 desired_align, align);
24383 /* Misaligned move sequences handles both prologues and epilogues at once.
24384 Default code generation results in smaller code for large alignments and
24385 also avoids redundant job when sizes are known precisely. */
24386 if (misaligned_prologue_used)
24388 /* Misaligned move prologue handled small blocks by itself. */
24389 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24390 (dst, src, &destreg, &srcreg,
24391 move_mode, promoted_val, vec_promoted_val,
24392 &count_exp,
24393 &jump_around_label,
24394 desired_align < align
24395 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24396 desired_align, align, &min_size, dynamic_check, issetmem);
24397 if (!issetmem)
24398 src = change_address (src, BLKmode, srcreg);
24399 dst = change_address (dst, BLKmode, destreg);
24400 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24401 epilogue_size_needed = 0;
24402 if (need_zero_guard && !min_size)
24404 /* It is possible that we copied enough so the main loop will not
24405 execute. */
24406 gcc_assert (size_needed > 1);
24407 if (jump_around_label == NULL_RTX)
24408 jump_around_label = gen_label_rtx ();
24409 emit_cmp_and_jump_insns (count_exp,
24410 GEN_INT (size_needed),
24411 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24412 if (expected_size == -1
24413 || expected_size < (desired_align - align) / 2 + size_needed)
24414 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24415 else
24416 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24419 /* Ensure that alignment prologue won't copy past end of block. */
24420 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24422 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24423 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24424 Make sure it is power of 2. */
24425 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24427 /* To improve performance of small blocks, we jump around the VAL
24428 promoting mode. This mean that if the promoted VAL is not constant,
24429 we might not use it in the epilogue and have to use byte
24430 loop variant. */
24431 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24432 force_loopy_epilogue = true;
24433 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24434 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24436 /* If main algorithm works on QImode, no epilogue is needed.
24437 For small sizes just don't align anything. */
24438 if (size_needed == 1)
24439 desired_align = align;
24440 else
24441 goto epilogue;
24443 else if (!count
24444 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24446 label = gen_label_rtx ();
24447 emit_cmp_and_jump_insns (count_exp,
24448 GEN_INT (epilogue_size_needed),
24449 LTU, 0, counter_mode (count_exp), 1, label);
24450 if (expected_size == -1 || expected_size < epilogue_size_needed)
24451 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24452 else
24453 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24457 /* Emit code to decide on runtime whether library call or inline should be
24458 used. */
24459 if (dynamic_check != -1)
24461 if (!issetmem && CONST_INT_P (count_exp))
24463 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24465 emit_block_move_via_libcall (dst, src, count_exp, false);
24466 count_exp = const0_rtx;
24467 goto epilogue;
24470 else
24472 rtx_code_label *hot_label = gen_label_rtx ();
24473 if (jump_around_label == NULL_RTX)
24474 jump_around_label = gen_label_rtx ();
24475 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24476 LEU, 0, counter_mode (count_exp),
24477 1, hot_label);
24478 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24479 if (issetmem)
24480 set_storage_via_libcall (dst, count_exp, val_exp, false);
24481 else
24482 emit_block_move_via_libcall (dst, src, count_exp, false);
24483 emit_jump (jump_around_label);
24484 emit_label (hot_label);
24488 /* Step 2: Alignment prologue. */
24489 /* Do the expensive promotion once we branched off the small blocks. */
24490 if (issetmem && !promoted_val)
24491 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24492 desired_align, align);
24494 if (desired_align > align && !misaligned_prologue_used)
24496 if (align_bytes == 0)
24498 /* Except for the first move in prologue, we no longer know
24499 constant offset in aliasing info. It don't seems to worth
24500 the pain to maintain it for the first move, so throw away
24501 the info early. */
24502 dst = change_address (dst, BLKmode, destreg);
24503 if (!issetmem)
24504 src = change_address (src, BLKmode, srcreg);
24505 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24506 promoted_val, vec_promoted_val,
24507 count_exp, align, desired_align,
24508 issetmem);
24509 /* At most desired_align - align bytes are copied. */
24510 if (min_size < (unsigned)(desired_align - align))
24511 min_size = 0;
24512 else
24513 min_size -= desired_align - align;
24515 else
24517 /* If we know how many bytes need to be stored before dst is
24518 sufficiently aligned, maintain aliasing info accurately. */
24519 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24520 srcreg,
24521 promoted_val,
24522 vec_promoted_val,
24523 desired_align,
24524 align_bytes,
24525 issetmem);
24527 count_exp = plus_constant (counter_mode (count_exp),
24528 count_exp, -align_bytes);
24529 count -= align_bytes;
24530 min_size -= align_bytes;
24531 max_size -= align_bytes;
24533 if (need_zero_guard
24534 && !min_size
24535 && (count < (unsigned HOST_WIDE_INT) size_needed
24536 || (align_bytes == 0
24537 && count < ((unsigned HOST_WIDE_INT) size_needed
24538 + desired_align - align))))
24540 /* It is possible that we copied enough so the main loop will not
24541 execute. */
24542 gcc_assert (size_needed > 1);
24543 if (label == NULL_RTX)
24544 label = gen_label_rtx ();
24545 emit_cmp_and_jump_insns (count_exp,
24546 GEN_INT (size_needed),
24547 LTU, 0, counter_mode (count_exp), 1, label);
24548 if (expected_size == -1
24549 || expected_size < (desired_align - align) / 2 + size_needed)
24550 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24551 else
24552 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24555 if (label && size_needed == 1)
24557 emit_label (label);
24558 LABEL_NUSES (label) = 1;
24559 label = NULL;
24560 epilogue_size_needed = 1;
24561 if (issetmem)
24562 promoted_val = val_exp;
24564 else if (label == NULL_RTX && !misaligned_prologue_used)
24565 epilogue_size_needed = size_needed;
24567 /* Step 3: Main loop. */
24569 switch (alg)
24571 case libcall:
24572 case no_stringop:
24573 case last_alg:
24574 gcc_unreachable ();
24575 case loop_1_byte:
24576 case loop:
24577 case unrolled_loop:
24578 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24579 count_exp, move_mode, unroll_factor,
24580 expected_size, issetmem);
24581 break;
24582 case vector_loop:
24583 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24584 vec_promoted_val, count_exp, move_mode,
24585 unroll_factor, expected_size, issetmem);
24586 break;
24587 case rep_prefix_8_byte:
24588 case rep_prefix_4_byte:
24589 case rep_prefix_1_byte:
24590 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24591 val_exp, count_exp, move_mode, issetmem);
24592 break;
24594 /* Adjust properly the offset of src and dest memory for aliasing. */
24595 if (CONST_INT_P (count_exp))
24597 if (!issetmem)
24598 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24599 (count / size_needed) * size_needed);
24600 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24601 (count / size_needed) * size_needed);
24603 else
24605 if (!issetmem)
24606 src = change_address (src, BLKmode, srcreg);
24607 dst = change_address (dst, BLKmode, destreg);
24610 /* Step 4: Epilogue to copy the remaining bytes. */
24611 epilogue:
24612 if (label)
24614 /* When the main loop is done, COUNT_EXP might hold original count,
24615 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24616 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24617 bytes. Compensate if needed. */
24619 if (size_needed < epilogue_size_needed)
24621 tmp =
24622 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24623 GEN_INT (size_needed - 1), count_exp, 1,
24624 OPTAB_DIRECT);
24625 if (tmp != count_exp)
24626 emit_move_insn (count_exp, tmp);
24628 emit_label (label);
24629 LABEL_NUSES (label) = 1;
24632 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24634 if (force_loopy_epilogue)
24635 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24636 epilogue_size_needed);
24637 else
24639 if (issetmem)
24640 expand_setmem_epilogue (dst, destreg, promoted_val,
24641 vec_promoted_val, count_exp,
24642 epilogue_size_needed);
24643 else
24644 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24645 epilogue_size_needed);
24648 if (jump_around_label)
24649 emit_label (jump_around_label);
24650 return true;
24654 /* Expand the appropriate insns for doing strlen if not just doing
24655 repnz; scasb
24657 out = result, initialized with the start address
24658 align_rtx = alignment of the address.
24659 scratch = scratch register, initialized with the startaddress when
24660 not aligned, otherwise undefined
24662 This is just the body. It needs the initializations mentioned above and
24663 some address computing at the end. These things are done in i386.md. */
24665 static void
24666 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24668 int align;
24669 rtx tmp;
24670 rtx_code_label *align_2_label = NULL;
24671 rtx_code_label *align_3_label = NULL;
24672 rtx_code_label *align_4_label = gen_label_rtx ();
24673 rtx_code_label *end_0_label = gen_label_rtx ();
24674 rtx mem;
24675 rtx tmpreg = gen_reg_rtx (SImode);
24676 rtx scratch = gen_reg_rtx (SImode);
24677 rtx cmp;
24679 align = 0;
24680 if (CONST_INT_P (align_rtx))
24681 align = INTVAL (align_rtx);
24683 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24685 /* Is there a known alignment and is it less than 4? */
24686 if (align < 4)
24688 rtx scratch1 = gen_reg_rtx (Pmode);
24689 emit_move_insn (scratch1, out);
24690 /* Is there a known alignment and is it not 2? */
24691 if (align != 2)
24693 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24694 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24696 /* Leave just the 3 lower bits. */
24697 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24698 NULL_RTX, 0, OPTAB_WIDEN);
24700 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24701 Pmode, 1, align_4_label);
24702 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24703 Pmode, 1, align_2_label);
24704 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24705 Pmode, 1, align_3_label);
24707 else
24709 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24710 check if is aligned to 4 - byte. */
24712 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24713 NULL_RTX, 0, OPTAB_WIDEN);
24715 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24716 Pmode, 1, align_4_label);
24719 mem = change_address (src, QImode, out);
24721 /* Now compare the bytes. */
24723 /* Compare the first n unaligned byte on a byte per byte basis. */
24724 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24725 QImode, 1, end_0_label);
24727 /* Increment the address. */
24728 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24730 /* Not needed with an alignment of 2 */
24731 if (align != 2)
24733 emit_label (align_2_label);
24735 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24736 end_0_label);
24738 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24740 emit_label (align_3_label);
24743 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24744 end_0_label);
24746 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24749 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24750 align this loop. It gives only huge programs, but does not help to
24751 speed up. */
24752 emit_label (align_4_label);
24754 mem = change_address (src, SImode, out);
24755 emit_move_insn (scratch, mem);
24756 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24758 /* This formula yields a nonzero result iff one of the bytes is zero.
24759 This saves three branches inside loop and many cycles. */
24761 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24762 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24763 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24764 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24765 gen_int_mode (0x80808080, SImode)));
24766 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24767 align_4_label);
24769 if (TARGET_CMOVE)
24771 rtx reg = gen_reg_rtx (SImode);
24772 rtx reg2 = gen_reg_rtx (Pmode);
24773 emit_move_insn (reg, tmpreg);
24774 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24776 /* If zero is not in the first two bytes, move two bytes forward. */
24777 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24778 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24779 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24780 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24781 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24782 reg,
24783 tmpreg)));
24784 /* Emit lea manually to avoid clobbering of flags. */
24785 emit_insn (gen_rtx_SET (SImode, reg2,
24786 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24788 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24789 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24790 emit_insn (gen_rtx_SET (VOIDmode, out,
24791 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24792 reg2,
24793 out)));
24795 else
24797 rtx_code_label *end_2_label = gen_label_rtx ();
24798 /* Is zero in the first two bytes? */
24800 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24801 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24802 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24803 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24804 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24805 pc_rtx);
24806 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24807 JUMP_LABEL (tmp) = end_2_label;
24809 /* Not in the first two. Move two bytes forward. */
24810 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24811 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24813 emit_label (end_2_label);
24817 /* Avoid branch in fixing the byte. */
24818 tmpreg = gen_lowpart (QImode, tmpreg);
24819 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24820 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24821 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24822 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24824 emit_label (end_0_label);
24827 /* Expand strlen. */
24829 bool
24830 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24832 rtx addr, scratch1, scratch2, scratch3, scratch4;
24834 /* The generic case of strlen expander is long. Avoid it's
24835 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24837 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24838 && !TARGET_INLINE_ALL_STRINGOPS
24839 && !optimize_insn_for_size_p ()
24840 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24841 return false;
24843 addr = force_reg (Pmode, XEXP (src, 0));
24844 scratch1 = gen_reg_rtx (Pmode);
24846 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24847 && !optimize_insn_for_size_p ())
24849 /* Well it seems that some optimizer does not combine a call like
24850 foo(strlen(bar), strlen(bar));
24851 when the move and the subtraction is done here. It does calculate
24852 the length just once when these instructions are done inside of
24853 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24854 often used and I use one fewer register for the lifetime of
24855 output_strlen_unroll() this is better. */
24857 emit_move_insn (out, addr);
24859 ix86_expand_strlensi_unroll_1 (out, src, align);
24861 /* strlensi_unroll_1 returns the address of the zero at the end of
24862 the string, like memchr(), so compute the length by subtracting
24863 the start address. */
24864 emit_insn (ix86_gen_sub3 (out, out, addr));
24866 else
24868 rtx unspec;
24870 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24871 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24872 return false;
24874 scratch2 = gen_reg_rtx (Pmode);
24875 scratch3 = gen_reg_rtx (Pmode);
24876 scratch4 = force_reg (Pmode, constm1_rtx);
24878 emit_move_insn (scratch3, addr);
24879 eoschar = force_reg (QImode, eoschar);
24881 src = replace_equiv_address_nv (src, scratch3);
24883 /* If .md starts supporting :P, this can be done in .md. */
24884 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24885 scratch4), UNSPEC_SCAS);
24886 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24887 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24888 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24890 return true;
24893 /* For given symbol (function) construct code to compute address of it's PLT
24894 entry in large x86-64 PIC model. */
24895 static rtx
24896 construct_plt_address (rtx symbol)
24898 rtx tmp, unspec;
24900 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24901 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24902 gcc_assert (Pmode == DImode);
24904 tmp = gen_reg_rtx (Pmode);
24905 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24907 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24908 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24909 return tmp;
24913 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24914 rtx callarg2,
24915 rtx pop, bool sibcall)
24917 unsigned int const cregs_size
24918 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24919 rtx vec[3 + cregs_size];
24920 rtx use = NULL, call;
24921 unsigned int vec_len = 0;
24923 if (pop == const0_rtx)
24924 pop = NULL;
24925 gcc_assert (!TARGET_64BIT || !pop);
24927 if (TARGET_MACHO && !TARGET_64BIT)
24929 #if TARGET_MACHO
24930 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24931 fnaddr = machopic_indirect_call_target (fnaddr);
24932 #endif
24934 else
24936 /* Static functions and indirect calls don't need the pic register. */
24937 if (flag_pic
24938 && (!TARGET_64BIT
24939 || (ix86_cmodel == CM_LARGE_PIC
24940 && DEFAULT_ABI != MS_ABI))
24941 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24942 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24943 use_reg (&use, pic_offset_table_rtx);
24946 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24948 rtx al = gen_rtx_REG (QImode, AX_REG);
24949 emit_move_insn (al, callarg2);
24950 use_reg (&use, al);
24953 if (ix86_cmodel == CM_LARGE_PIC
24954 && !TARGET_PECOFF
24955 && MEM_P (fnaddr)
24956 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24957 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24958 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24959 else if (sibcall
24960 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24961 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24963 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24964 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24967 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24968 if (retval)
24969 call = gen_rtx_SET (VOIDmode, retval, call);
24970 vec[vec_len++] = call;
24972 if (pop)
24974 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24975 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24976 vec[vec_len++] = pop;
24979 if (TARGET_64BIT_MS_ABI
24980 && (!callarg2 || INTVAL (callarg2) != -2))
24982 unsigned i;
24984 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24985 UNSPEC_MS_TO_SYSV_CALL);
24987 for (i = 0; i < cregs_size; i++)
24989 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24990 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24992 vec[vec_len++]
24993 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24997 if (vec_len > 1)
24998 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24999 call = emit_call_insn (call);
25000 if (use)
25001 CALL_INSN_FUNCTION_USAGE (call) = use;
25003 return call;
25006 /* Output the assembly for a call instruction. */
25008 const char *
25009 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25011 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25012 bool seh_nop_p = false;
25013 const char *xasm;
25015 if (SIBLING_CALL_P (insn))
25017 if (direct_p)
25018 xasm = "jmp\t%P0";
25019 /* SEH epilogue detection requires the indirect branch case
25020 to include REX.W. */
25021 else if (TARGET_SEH)
25022 xasm = "rex.W jmp %A0";
25023 else
25024 xasm = "jmp\t%A0";
25026 output_asm_insn (xasm, &call_op);
25027 return "";
25030 /* SEH unwinding can require an extra nop to be emitted in several
25031 circumstances. Determine if we have one of those. */
25032 if (TARGET_SEH)
25034 rtx_insn *i;
25036 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25038 /* If we get to another real insn, we don't need the nop. */
25039 if (INSN_P (i))
25040 break;
25042 /* If we get to the epilogue note, prevent a catch region from
25043 being adjacent to the standard epilogue sequence. If non-
25044 call-exceptions, we'll have done this during epilogue emission. */
25045 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25046 && !flag_non_call_exceptions
25047 && !can_throw_internal (insn))
25049 seh_nop_p = true;
25050 break;
25054 /* If we didn't find a real insn following the call, prevent the
25055 unwinder from looking into the next function. */
25056 if (i == NULL)
25057 seh_nop_p = true;
25060 if (direct_p)
25061 xasm = "call\t%P0";
25062 else
25063 xasm = "call\t%A0";
25065 output_asm_insn (xasm, &call_op);
25067 if (seh_nop_p)
25068 return "nop";
25070 return "";
25073 /* Clear stack slot assignments remembered from previous functions.
25074 This is called from INIT_EXPANDERS once before RTL is emitted for each
25075 function. */
25077 static struct machine_function *
25078 ix86_init_machine_status (void)
25080 struct machine_function *f;
25082 f = ggc_cleared_alloc<machine_function> ();
25083 f->use_fast_prologue_epilogue_nregs = -1;
25084 f->call_abi = ix86_abi;
25086 return f;
25089 /* Return a MEM corresponding to a stack slot with mode MODE.
25090 Allocate a new slot if necessary.
25092 The RTL for a function can have several slots available: N is
25093 which slot to use. */
25096 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25098 struct stack_local_entry *s;
25100 gcc_assert (n < MAX_386_STACK_LOCALS);
25102 for (s = ix86_stack_locals; s; s = s->next)
25103 if (s->mode == mode && s->n == n)
25104 return validize_mem (copy_rtx (s->rtl));
25106 s = ggc_alloc<stack_local_entry> ();
25107 s->n = n;
25108 s->mode = mode;
25109 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25111 s->next = ix86_stack_locals;
25112 ix86_stack_locals = s;
25113 return validize_mem (copy_rtx (s->rtl));
25116 static void
25117 ix86_instantiate_decls (void)
25119 struct stack_local_entry *s;
25121 for (s = ix86_stack_locals; s; s = s->next)
25122 if (s->rtl != NULL_RTX)
25123 instantiate_decl_rtl (s->rtl);
25126 /* Check whether x86 address PARTS is a pc-relative address. */
25128 static bool
25129 rip_relative_addr_p (struct ix86_address *parts)
25131 rtx base, index, disp;
25133 base = parts->base;
25134 index = parts->index;
25135 disp = parts->disp;
25137 if (disp && !base && !index)
25139 if (TARGET_64BIT)
25141 rtx symbol = disp;
25143 if (GET_CODE (disp) == CONST)
25144 symbol = XEXP (disp, 0);
25145 if (GET_CODE (symbol) == PLUS
25146 && CONST_INT_P (XEXP (symbol, 1)))
25147 symbol = XEXP (symbol, 0);
25149 if (GET_CODE (symbol) == LABEL_REF
25150 || (GET_CODE (symbol) == SYMBOL_REF
25151 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25152 || (GET_CODE (symbol) == UNSPEC
25153 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25154 || XINT (symbol, 1) == UNSPEC_PCREL
25155 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25156 return true;
25159 return false;
25162 /* Calculate the length of the memory address in the instruction encoding.
25163 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25164 or other prefixes. We never generate addr32 prefix for LEA insn. */
25167 memory_address_length (rtx addr, bool lea)
25169 struct ix86_address parts;
25170 rtx base, index, disp;
25171 int len;
25172 int ok;
25174 if (GET_CODE (addr) == PRE_DEC
25175 || GET_CODE (addr) == POST_INC
25176 || GET_CODE (addr) == PRE_MODIFY
25177 || GET_CODE (addr) == POST_MODIFY)
25178 return 0;
25180 ok = ix86_decompose_address (addr, &parts);
25181 gcc_assert (ok);
25183 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25185 /* If this is not LEA instruction, add the length of addr32 prefix. */
25186 if (TARGET_64BIT && !lea
25187 && (SImode_address_operand (addr, VOIDmode)
25188 || (parts.base && GET_MODE (parts.base) == SImode)
25189 || (parts.index && GET_MODE (parts.index) == SImode)))
25190 len++;
25192 base = parts.base;
25193 index = parts.index;
25194 disp = parts.disp;
25196 if (base && GET_CODE (base) == SUBREG)
25197 base = SUBREG_REG (base);
25198 if (index && GET_CODE (index) == SUBREG)
25199 index = SUBREG_REG (index);
25201 gcc_assert (base == NULL_RTX || REG_P (base));
25202 gcc_assert (index == NULL_RTX || REG_P (index));
25204 /* Rule of thumb:
25205 - esp as the base always wants an index,
25206 - ebp as the base always wants a displacement,
25207 - r12 as the base always wants an index,
25208 - r13 as the base always wants a displacement. */
25210 /* Register Indirect. */
25211 if (base && !index && !disp)
25213 /* esp (for its index) and ebp (for its displacement) need
25214 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25215 code. */
25216 if (base == arg_pointer_rtx
25217 || base == frame_pointer_rtx
25218 || REGNO (base) == SP_REG
25219 || REGNO (base) == BP_REG
25220 || REGNO (base) == R12_REG
25221 || REGNO (base) == R13_REG)
25222 len++;
25225 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25226 is not disp32, but disp32(%rip), so for disp32
25227 SIB byte is needed, unless print_operand_address
25228 optimizes it into disp32(%rip) or (%rip) is implied
25229 by UNSPEC. */
25230 else if (disp && !base && !index)
25232 len += 4;
25233 if (rip_relative_addr_p (&parts))
25234 len++;
25236 else
25238 /* Find the length of the displacement constant. */
25239 if (disp)
25241 if (base && satisfies_constraint_K (disp))
25242 len += 1;
25243 else
25244 len += 4;
25246 /* ebp always wants a displacement. Similarly r13. */
25247 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25248 len++;
25250 /* An index requires the two-byte modrm form.... */
25251 if (index
25252 /* ...like esp (or r12), which always wants an index. */
25253 || base == arg_pointer_rtx
25254 || base == frame_pointer_rtx
25255 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25256 len++;
25259 return len;
25262 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25263 is set, expect that insn have 8bit immediate alternative. */
25265 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25267 int len = 0;
25268 int i;
25269 extract_insn_cached (insn);
25270 for (i = recog_data.n_operands - 1; i >= 0; --i)
25271 if (CONSTANT_P (recog_data.operand[i]))
25273 enum attr_mode mode = get_attr_mode (insn);
25275 gcc_assert (!len);
25276 if (shortform && CONST_INT_P (recog_data.operand[i]))
25278 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25279 switch (mode)
25281 case MODE_QI:
25282 len = 1;
25283 continue;
25284 case MODE_HI:
25285 ival = trunc_int_for_mode (ival, HImode);
25286 break;
25287 case MODE_SI:
25288 ival = trunc_int_for_mode (ival, SImode);
25289 break;
25290 default:
25291 break;
25293 if (IN_RANGE (ival, -128, 127))
25295 len = 1;
25296 continue;
25299 switch (mode)
25301 case MODE_QI:
25302 len = 1;
25303 break;
25304 case MODE_HI:
25305 len = 2;
25306 break;
25307 case MODE_SI:
25308 len = 4;
25309 break;
25310 /* Immediates for DImode instructions are encoded
25311 as 32bit sign extended values. */
25312 case MODE_DI:
25313 len = 4;
25314 break;
25315 default:
25316 fatal_insn ("unknown insn mode", insn);
25319 return len;
25322 /* Compute default value for "length_address" attribute. */
25324 ix86_attr_length_address_default (rtx insn)
25326 int i;
25328 if (get_attr_type (insn) == TYPE_LEA)
25330 rtx set = PATTERN (insn), addr;
25332 if (GET_CODE (set) == PARALLEL)
25333 set = XVECEXP (set, 0, 0);
25335 gcc_assert (GET_CODE (set) == SET);
25337 addr = SET_SRC (set);
25339 return memory_address_length (addr, true);
25342 extract_insn_cached (insn);
25343 for (i = recog_data.n_operands - 1; i >= 0; --i)
25344 if (MEM_P (recog_data.operand[i]))
25346 constrain_operands_cached (reload_completed);
25347 if (which_alternative != -1)
25349 const char *constraints = recog_data.constraints[i];
25350 int alt = which_alternative;
25352 while (*constraints == '=' || *constraints == '+')
25353 constraints++;
25354 while (alt-- > 0)
25355 while (*constraints++ != ',')
25357 /* Skip ignored operands. */
25358 if (*constraints == 'X')
25359 continue;
25361 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25363 return 0;
25366 /* Compute default value for "length_vex" attribute. It includes
25367 2 or 3 byte VEX prefix and 1 opcode byte. */
25370 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25372 int i;
25374 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25375 byte VEX prefix. */
25376 if (!has_0f_opcode || has_vex_w)
25377 return 3 + 1;
25379 /* We can always use 2 byte VEX prefix in 32bit. */
25380 if (!TARGET_64BIT)
25381 return 2 + 1;
25383 extract_insn_cached (insn);
25385 for (i = recog_data.n_operands - 1; i >= 0; --i)
25386 if (REG_P (recog_data.operand[i]))
25388 /* REX.W bit uses 3 byte VEX prefix. */
25389 if (GET_MODE (recog_data.operand[i]) == DImode
25390 && GENERAL_REG_P (recog_data.operand[i]))
25391 return 3 + 1;
25393 else
25395 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25396 if (MEM_P (recog_data.operand[i])
25397 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25398 return 3 + 1;
25401 return 2 + 1;
25404 /* Return the maximum number of instructions a cpu can issue. */
25406 static int
25407 ix86_issue_rate (void)
25409 switch (ix86_tune)
25411 case PROCESSOR_PENTIUM:
25412 case PROCESSOR_BONNELL:
25413 case PROCESSOR_SILVERMONT:
25414 case PROCESSOR_INTEL:
25415 case PROCESSOR_K6:
25416 case PROCESSOR_BTVER2:
25417 case PROCESSOR_PENTIUM4:
25418 case PROCESSOR_NOCONA:
25419 return 2;
25421 case PROCESSOR_PENTIUMPRO:
25422 case PROCESSOR_ATHLON:
25423 case PROCESSOR_K8:
25424 case PROCESSOR_AMDFAM10:
25425 case PROCESSOR_GENERIC:
25426 case PROCESSOR_BTVER1:
25427 return 3;
25429 case PROCESSOR_BDVER1:
25430 case PROCESSOR_BDVER2:
25431 case PROCESSOR_BDVER3:
25432 case PROCESSOR_BDVER4:
25433 case PROCESSOR_CORE2:
25434 case PROCESSOR_NEHALEM:
25435 case PROCESSOR_SANDYBRIDGE:
25436 case PROCESSOR_HASWELL:
25437 return 4;
25439 default:
25440 return 1;
25444 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25445 by DEP_INSN and nothing set by DEP_INSN. */
25447 static bool
25448 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25450 rtx set, set2;
25452 /* Simplify the test for uninteresting insns. */
25453 if (insn_type != TYPE_SETCC
25454 && insn_type != TYPE_ICMOV
25455 && insn_type != TYPE_FCMOV
25456 && insn_type != TYPE_IBR)
25457 return false;
25459 if ((set = single_set (dep_insn)) != 0)
25461 set = SET_DEST (set);
25462 set2 = NULL_RTX;
25464 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25465 && XVECLEN (PATTERN (dep_insn), 0) == 2
25466 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25467 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25469 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25470 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25472 else
25473 return false;
25475 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25476 return false;
25478 /* This test is true if the dependent insn reads the flags but
25479 not any other potentially set register. */
25480 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25481 return false;
25483 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25484 return false;
25486 return true;
25489 /* Return true iff USE_INSN has a memory address with operands set by
25490 SET_INSN. */
25492 bool
25493 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25495 int i;
25496 extract_insn_cached (use_insn);
25497 for (i = recog_data.n_operands - 1; i >= 0; --i)
25498 if (MEM_P (recog_data.operand[i]))
25500 rtx addr = XEXP (recog_data.operand[i], 0);
25501 return modified_in_p (addr, set_insn) != 0;
25503 return false;
25506 /* Helper function for exact_store_load_dependency.
25507 Return true if addr is found in insn. */
25508 static bool
25509 exact_dependency_1 (rtx addr, rtx insn)
25511 enum rtx_code code;
25512 const char *format_ptr;
25513 int i, j;
25515 code = GET_CODE (insn);
25516 switch (code)
25518 case MEM:
25519 if (rtx_equal_p (addr, insn))
25520 return true;
25521 break;
25522 case REG:
25523 CASE_CONST_ANY:
25524 case SYMBOL_REF:
25525 case CODE_LABEL:
25526 case PC:
25527 case CC0:
25528 case EXPR_LIST:
25529 return false;
25530 default:
25531 break;
25534 format_ptr = GET_RTX_FORMAT (code);
25535 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25537 switch (*format_ptr++)
25539 case 'e':
25540 if (exact_dependency_1 (addr, XEXP (insn, i)))
25541 return true;
25542 break;
25543 case 'E':
25544 for (j = 0; j < XVECLEN (insn, i); j++)
25545 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25546 return true;
25547 break;
25550 return false;
25553 /* Return true if there exists exact dependency for store & load, i.e.
25554 the same memory address is used in them. */
25555 static bool
25556 exact_store_load_dependency (rtx store, rtx load)
25558 rtx set1, set2;
25560 set1 = single_set (store);
25561 if (!set1)
25562 return false;
25563 if (!MEM_P (SET_DEST (set1)))
25564 return false;
25565 set2 = single_set (load);
25566 if (!set2)
25567 return false;
25568 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25569 return true;
25570 return false;
25573 static int
25574 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
25576 enum attr_type insn_type, dep_insn_type;
25577 enum attr_memory memory;
25578 rtx set, set2;
25579 int dep_insn_code_number;
25581 /* Anti and output dependencies have zero cost on all CPUs. */
25582 if (REG_NOTE_KIND (link) != 0)
25583 return 0;
25585 dep_insn_code_number = recog_memoized (dep_insn);
25587 /* If we can't recognize the insns, we can't really do anything. */
25588 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25589 return cost;
25591 insn_type = get_attr_type (insn);
25592 dep_insn_type = get_attr_type (dep_insn);
25594 switch (ix86_tune)
25596 case PROCESSOR_PENTIUM:
25597 /* Address Generation Interlock adds a cycle of latency. */
25598 if (insn_type == TYPE_LEA)
25600 rtx addr = PATTERN (insn);
25602 if (GET_CODE (addr) == PARALLEL)
25603 addr = XVECEXP (addr, 0, 0);
25605 gcc_assert (GET_CODE (addr) == SET);
25607 addr = SET_SRC (addr);
25608 if (modified_in_p (addr, dep_insn))
25609 cost += 1;
25611 else if (ix86_agi_dependent (dep_insn, insn))
25612 cost += 1;
25614 /* ??? Compares pair with jump/setcc. */
25615 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25616 cost = 0;
25618 /* Floating point stores require value to be ready one cycle earlier. */
25619 if (insn_type == TYPE_FMOV
25620 && get_attr_memory (insn) == MEMORY_STORE
25621 && !ix86_agi_dependent (dep_insn, insn))
25622 cost += 1;
25623 break;
25625 case PROCESSOR_PENTIUMPRO:
25626 /* INT->FP conversion is expensive. */
25627 if (get_attr_fp_int_src (dep_insn))
25628 cost += 5;
25630 /* There is one cycle extra latency between an FP op and a store. */
25631 if (insn_type == TYPE_FMOV
25632 && (set = single_set (dep_insn)) != NULL_RTX
25633 && (set2 = single_set (insn)) != NULL_RTX
25634 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25635 && MEM_P (SET_DEST (set2)))
25636 cost += 1;
25638 memory = get_attr_memory (insn);
25640 /* Show ability of reorder buffer to hide latency of load by executing
25641 in parallel with previous instruction in case
25642 previous instruction is not needed to compute the address. */
25643 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25644 && !ix86_agi_dependent (dep_insn, insn))
25646 /* Claim moves to take one cycle, as core can issue one load
25647 at time and the next load can start cycle later. */
25648 if (dep_insn_type == TYPE_IMOV
25649 || dep_insn_type == TYPE_FMOV)
25650 cost = 1;
25651 else if (cost > 1)
25652 cost--;
25654 break;
25656 case PROCESSOR_K6:
25657 /* The esp dependency is resolved before
25658 the instruction is really finished. */
25659 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25660 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25661 return 1;
25663 /* INT->FP conversion is expensive. */
25664 if (get_attr_fp_int_src (dep_insn))
25665 cost += 5;
25667 memory = get_attr_memory (insn);
25669 /* Show ability of reorder buffer to hide latency of load by executing
25670 in parallel with previous instruction in case
25671 previous instruction is not needed to compute the address. */
25672 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25673 && !ix86_agi_dependent (dep_insn, insn))
25675 /* Claim moves to take one cycle, as core can issue one load
25676 at time and the next load can start cycle later. */
25677 if (dep_insn_type == TYPE_IMOV
25678 || dep_insn_type == TYPE_FMOV)
25679 cost = 1;
25680 else if (cost > 2)
25681 cost -= 2;
25682 else
25683 cost = 1;
25685 break;
25687 case PROCESSOR_AMDFAM10:
25688 case PROCESSOR_BDVER1:
25689 case PROCESSOR_BDVER2:
25690 case PROCESSOR_BDVER3:
25691 case PROCESSOR_BDVER4:
25692 case PROCESSOR_BTVER1:
25693 case PROCESSOR_BTVER2:
25694 case PROCESSOR_GENERIC:
25695 /* Stack engine allows to execute push&pop instructions in parall. */
25696 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25697 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25698 return 0;
25699 /* FALLTHRU */
25701 case PROCESSOR_ATHLON:
25702 case PROCESSOR_K8:
25703 memory = get_attr_memory (insn);
25705 /* Show ability of reorder buffer to hide latency of load by executing
25706 in parallel with previous instruction in case
25707 previous instruction is not needed to compute the address. */
25708 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25709 && !ix86_agi_dependent (dep_insn, insn))
25711 enum attr_unit unit = get_attr_unit (insn);
25712 int loadcost = 3;
25714 /* Because of the difference between the length of integer and
25715 floating unit pipeline preparation stages, the memory operands
25716 for floating point are cheaper.
25718 ??? For Athlon it the difference is most probably 2. */
25719 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25720 loadcost = 3;
25721 else
25722 loadcost = TARGET_ATHLON ? 2 : 0;
25724 if (cost >= loadcost)
25725 cost -= loadcost;
25726 else
25727 cost = 0;
25729 break;
25731 case PROCESSOR_CORE2:
25732 case PROCESSOR_NEHALEM:
25733 case PROCESSOR_SANDYBRIDGE:
25734 case PROCESSOR_HASWELL:
25735 /* Stack engine allows to execute push&pop instructions in parall. */
25736 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25737 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25738 return 0;
25740 memory = get_attr_memory (insn);
25742 /* Show ability of reorder buffer to hide latency of load by executing
25743 in parallel with previous instruction in case
25744 previous instruction is not needed to compute the address. */
25745 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25746 && !ix86_agi_dependent (dep_insn, insn))
25748 if (cost >= 4)
25749 cost -= 4;
25750 else
25751 cost = 0;
25753 break;
25755 case PROCESSOR_SILVERMONT:
25756 case PROCESSOR_INTEL:
25757 if (!reload_completed)
25758 return cost;
25760 /* Increase cost of integer loads. */
25761 memory = get_attr_memory (dep_insn);
25762 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25764 enum attr_unit unit = get_attr_unit (dep_insn);
25765 if (unit == UNIT_INTEGER && cost == 1)
25767 if (memory == MEMORY_LOAD)
25768 cost = 3;
25769 else
25771 /* Increase cost of ld/st for short int types only
25772 because of store forwarding issue. */
25773 rtx set = single_set (dep_insn);
25774 if (set && (GET_MODE (SET_DEST (set)) == QImode
25775 || GET_MODE (SET_DEST (set)) == HImode))
25777 /* Increase cost of store/load insn if exact
25778 dependence exists and it is load insn. */
25779 enum attr_memory insn_memory = get_attr_memory (insn);
25780 if (insn_memory == MEMORY_LOAD
25781 && exact_store_load_dependency (dep_insn, insn))
25782 cost = 3;
25788 default:
25789 break;
25792 return cost;
25795 /* How many alternative schedules to try. This should be as wide as the
25796 scheduling freedom in the DFA, but no wider. Making this value too
25797 large results extra work for the scheduler. */
25799 static int
25800 ia32_multipass_dfa_lookahead (void)
25802 switch (ix86_tune)
25804 case PROCESSOR_PENTIUM:
25805 return 2;
25807 case PROCESSOR_PENTIUMPRO:
25808 case PROCESSOR_K6:
25809 return 1;
25811 case PROCESSOR_BDVER1:
25812 case PROCESSOR_BDVER2:
25813 case PROCESSOR_BDVER3:
25814 case PROCESSOR_BDVER4:
25815 /* We use lookahead value 4 for BD both before and after reload
25816 schedules. Plan is to have value 8 included for O3. */
25817 return 4;
25819 case PROCESSOR_CORE2:
25820 case PROCESSOR_NEHALEM:
25821 case PROCESSOR_SANDYBRIDGE:
25822 case PROCESSOR_HASWELL:
25823 case PROCESSOR_BONNELL:
25824 case PROCESSOR_SILVERMONT:
25825 case PROCESSOR_INTEL:
25826 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25827 as many instructions can be executed on a cycle, i.e.,
25828 issue_rate. I wonder why tuning for many CPUs does not do this. */
25829 if (reload_completed)
25830 return ix86_issue_rate ();
25831 /* Don't use lookahead for pre-reload schedule to save compile time. */
25832 return 0;
25834 default:
25835 return 0;
25839 /* Return true if target platform supports macro-fusion. */
25841 static bool
25842 ix86_macro_fusion_p ()
25844 return TARGET_FUSE_CMP_AND_BRANCH;
25847 /* Check whether current microarchitecture support macro fusion
25848 for insn pair "CONDGEN + CONDJMP". Refer to
25849 "Intel Architectures Optimization Reference Manual". */
25851 static bool
25852 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
25854 rtx src, dest;
25855 rtx single_set = single_set (condgen);
25856 enum rtx_code ccode;
25857 rtx compare_set = NULL_RTX, test_if, cond;
25858 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25860 if (!any_condjump_p (condjmp))
25861 return false;
25863 if (get_attr_type (condgen) != TYPE_TEST
25864 && get_attr_type (condgen) != TYPE_ICMP
25865 && get_attr_type (condgen) != TYPE_INCDEC
25866 && get_attr_type (condgen) != TYPE_ALU)
25867 return false;
25869 if (single_set == NULL_RTX
25870 && !TARGET_FUSE_ALU_AND_BRANCH)
25871 return false;
25873 if (single_set != NULL_RTX)
25874 compare_set = single_set;
25875 else
25877 int i;
25878 rtx pat = PATTERN (condgen);
25879 for (i = 0; i < XVECLEN (pat, 0); i++)
25880 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25882 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25883 if (GET_CODE (set_src) == COMPARE)
25884 compare_set = XVECEXP (pat, 0, i);
25885 else
25886 alu_set = XVECEXP (pat, 0, i);
25889 if (compare_set == NULL_RTX)
25890 return false;
25891 src = SET_SRC (compare_set);
25892 if (GET_CODE (src) != COMPARE)
25893 return false;
25895 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25896 supported. */
25897 if ((MEM_P (XEXP (src, 0))
25898 && CONST_INT_P (XEXP (src, 1)))
25899 || (MEM_P (XEXP (src, 1))
25900 && CONST_INT_P (XEXP (src, 0))))
25901 return false;
25903 /* No fusion for RIP-relative address. */
25904 if (MEM_P (XEXP (src, 0)))
25905 addr = XEXP (XEXP (src, 0), 0);
25906 else if (MEM_P (XEXP (src, 1)))
25907 addr = XEXP (XEXP (src, 1), 0);
25909 if (addr) {
25910 ix86_address parts;
25911 int ok = ix86_decompose_address (addr, &parts);
25912 gcc_assert (ok);
25914 if (rip_relative_addr_p (&parts))
25915 return false;
25918 test_if = SET_SRC (pc_set (condjmp));
25919 cond = XEXP (test_if, 0);
25920 ccode = GET_CODE (cond);
25921 /* Check whether conditional jump use Sign or Overflow Flags. */
25922 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25923 && (ccode == GE
25924 || ccode == GT
25925 || ccode == LE
25926 || ccode == LT))
25927 return false;
25929 /* Return true for TYPE_TEST and TYPE_ICMP. */
25930 if (get_attr_type (condgen) == TYPE_TEST
25931 || get_attr_type (condgen) == TYPE_ICMP)
25932 return true;
25934 /* The following is the case that macro-fusion for alu + jmp. */
25935 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25936 return false;
25938 /* No fusion for alu op with memory destination operand. */
25939 dest = SET_DEST (alu_set);
25940 if (MEM_P (dest))
25941 return false;
25943 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25944 supported. */
25945 if (get_attr_type (condgen) == TYPE_INCDEC
25946 && (ccode == GEU
25947 || ccode == GTU
25948 || ccode == LEU
25949 || ccode == LTU))
25950 return false;
25952 return true;
25955 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25956 execution. It is applied if
25957 (1) IMUL instruction is on the top of list;
25958 (2) There exists the only producer of independent IMUL instruction in
25959 ready list.
25960 Return index of IMUL producer if it was found and -1 otherwise. */
25961 static int
25962 do_reorder_for_imul (rtx_insn **ready, int n_ready)
25964 rtx_insn *insn;
25965 rtx set, insn1, insn2;
25966 sd_iterator_def sd_it;
25967 dep_t dep;
25968 int index = -1;
25969 int i;
25971 if (!TARGET_BONNELL)
25972 return index;
25974 /* Check that IMUL instruction is on the top of ready list. */
25975 insn = ready[n_ready - 1];
25976 set = single_set (insn);
25977 if (!set)
25978 return index;
25979 if (!(GET_CODE (SET_SRC (set)) == MULT
25980 && GET_MODE (SET_SRC (set)) == SImode))
25981 return index;
25983 /* Search for producer of independent IMUL instruction. */
25984 for (i = n_ready - 2; i >= 0; i--)
25986 insn = ready[i];
25987 if (!NONDEBUG_INSN_P (insn))
25988 continue;
25989 /* Skip IMUL instruction. */
25990 insn2 = PATTERN (insn);
25991 if (GET_CODE (insn2) == PARALLEL)
25992 insn2 = XVECEXP (insn2, 0, 0);
25993 if (GET_CODE (insn2) == SET
25994 && GET_CODE (SET_SRC (insn2)) == MULT
25995 && GET_MODE (SET_SRC (insn2)) == SImode)
25996 continue;
25998 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26000 rtx con;
26001 con = DEP_CON (dep);
26002 if (!NONDEBUG_INSN_P (con))
26003 continue;
26004 insn1 = PATTERN (con);
26005 if (GET_CODE (insn1) == PARALLEL)
26006 insn1 = XVECEXP (insn1, 0, 0);
26008 if (GET_CODE (insn1) == SET
26009 && GET_CODE (SET_SRC (insn1)) == MULT
26010 && GET_MODE (SET_SRC (insn1)) == SImode)
26012 sd_iterator_def sd_it1;
26013 dep_t dep1;
26014 /* Check if there is no other dependee for IMUL. */
26015 index = i;
26016 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26018 rtx pro;
26019 pro = DEP_PRO (dep1);
26020 if (!NONDEBUG_INSN_P (pro))
26021 continue;
26022 if (pro != insn)
26023 index = -1;
26025 if (index >= 0)
26026 break;
26029 if (index >= 0)
26030 break;
26032 return index;
26035 /* Try to find the best candidate on the top of ready list if two insns
26036 have the same priority - candidate is best if its dependees were
26037 scheduled earlier. Applied for Silvermont only.
26038 Return true if top 2 insns must be interchanged. */
26039 static bool
26040 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26042 rtx_insn *top = ready[n_ready - 1];
26043 rtx_insn *next = ready[n_ready - 2];
26044 rtx set;
26045 sd_iterator_def sd_it;
26046 dep_t dep;
26047 int clock1 = -1;
26048 int clock2 = -1;
26049 #define INSN_TICK(INSN) (HID (INSN)->tick)
26051 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26052 return false;
26054 if (!NONDEBUG_INSN_P (top))
26055 return false;
26056 if (!NONJUMP_INSN_P (top))
26057 return false;
26058 if (!NONDEBUG_INSN_P (next))
26059 return false;
26060 if (!NONJUMP_INSN_P (next))
26061 return false;
26062 set = single_set (top);
26063 if (!set)
26064 return false;
26065 set = single_set (next);
26066 if (!set)
26067 return false;
26069 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26071 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26072 return false;
26073 /* Determine winner more precise. */
26074 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26076 rtx pro;
26077 pro = DEP_PRO (dep);
26078 if (!NONDEBUG_INSN_P (pro))
26079 continue;
26080 if (INSN_TICK (pro) > clock1)
26081 clock1 = INSN_TICK (pro);
26083 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26085 rtx pro;
26086 pro = DEP_PRO (dep);
26087 if (!NONDEBUG_INSN_P (pro))
26088 continue;
26089 if (INSN_TICK (pro) > clock2)
26090 clock2 = INSN_TICK (pro);
26093 if (clock1 == clock2)
26095 /* Determine winner - load must win. */
26096 enum attr_memory memory1, memory2;
26097 memory1 = get_attr_memory (top);
26098 memory2 = get_attr_memory (next);
26099 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26100 return true;
26102 return (bool) (clock2 < clock1);
26104 return false;
26105 #undef INSN_TICK
26108 /* Perform possible reodering of ready list for Atom/Silvermont only.
26109 Return issue rate. */
26110 static int
26111 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26112 int *pn_ready, int clock_var)
26114 int issue_rate = -1;
26115 int n_ready = *pn_ready;
26116 int i;
26117 rtx_insn *insn;
26118 int index = -1;
26120 /* Set up issue rate. */
26121 issue_rate = ix86_issue_rate ();
26123 /* Do reodering for BONNELL/SILVERMONT only. */
26124 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26125 return issue_rate;
26127 /* Nothing to do if ready list contains only 1 instruction. */
26128 if (n_ready <= 1)
26129 return issue_rate;
26131 /* Do reodering for post-reload scheduler only. */
26132 if (!reload_completed)
26133 return issue_rate;
26135 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26137 if (sched_verbose > 1)
26138 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26139 INSN_UID (ready[index]));
26141 /* Put IMUL producer (ready[index]) at the top of ready list. */
26142 insn = ready[index];
26143 for (i = index; i < n_ready - 1; i++)
26144 ready[i] = ready[i + 1];
26145 ready[n_ready - 1] = insn;
26146 return issue_rate;
26148 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26150 if (sched_verbose > 1)
26151 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26152 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26153 /* Swap 2 top elements of ready list. */
26154 insn = ready[n_ready - 1];
26155 ready[n_ready - 1] = ready[n_ready - 2];
26156 ready[n_ready - 2] = insn;
26158 return issue_rate;
26161 static bool
26162 ix86_class_likely_spilled_p (reg_class_t);
26164 /* Returns true if lhs of insn is HW function argument register and set up
26165 is_spilled to true if it is likely spilled HW register. */
26166 static bool
26167 insn_is_function_arg (rtx insn, bool* is_spilled)
26169 rtx dst;
26171 if (!NONDEBUG_INSN_P (insn))
26172 return false;
26173 /* Call instructions are not movable, ignore it. */
26174 if (CALL_P (insn))
26175 return false;
26176 insn = PATTERN (insn);
26177 if (GET_CODE (insn) == PARALLEL)
26178 insn = XVECEXP (insn, 0, 0);
26179 if (GET_CODE (insn) != SET)
26180 return false;
26181 dst = SET_DEST (insn);
26182 if (REG_P (dst) && HARD_REGISTER_P (dst)
26183 && ix86_function_arg_regno_p (REGNO (dst)))
26185 /* Is it likely spilled HW register? */
26186 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26187 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26188 *is_spilled = true;
26189 return true;
26191 return false;
26194 /* Add output dependencies for chain of function adjacent arguments if only
26195 there is a move to likely spilled HW register. Return first argument
26196 if at least one dependence was added or NULL otherwise. */
26197 static rtx_insn *
26198 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26200 rtx_insn *insn;
26201 rtx_insn *last = call;
26202 rtx_insn *first_arg = NULL;
26203 bool is_spilled = false;
26205 head = PREV_INSN (head);
26207 /* Find nearest to call argument passing instruction. */
26208 while (true)
26210 last = PREV_INSN (last);
26211 if (last == head)
26212 return NULL;
26213 if (!NONDEBUG_INSN_P (last))
26214 continue;
26215 if (insn_is_function_arg (last, &is_spilled))
26216 break;
26217 return NULL;
26220 first_arg = last;
26221 while (true)
26223 insn = PREV_INSN (last);
26224 if (!INSN_P (insn))
26225 break;
26226 if (insn == head)
26227 break;
26228 if (!NONDEBUG_INSN_P (insn))
26230 last = insn;
26231 continue;
26233 if (insn_is_function_arg (insn, &is_spilled))
26235 /* Add output depdendence between two function arguments if chain
26236 of output arguments contains likely spilled HW registers. */
26237 if (is_spilled)
26238 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26239 first_arg = last = insn;
26241 else
26242 break;
26244 if (!is_spilled)
26245 return NULL;
26246 return first_arg;
26249 /* Add output or anti dependency from insn to first_arg to restrict its code
26250 motion. */
26251 static void
26252 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26254 rtx set;
26255 rtx tmp;
26257 set = single_set (insn);
26258 if (!set)
26259 return;
26260 tmp = SET_DEST (set);
26261 if (REG_P (tmp))
26263 /* Add output dependency to the first function argument. */
26264 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26265 return;
26267 /* Add anti dependency. */
26268 add_dependence (first_arg, insn, REG_DEP_ANTI);
26271 /* Avoid cross block motion of function argument through adding dependency
26272 from the first non-jump instruction in bb. */
26273 static void
26274 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26276 rtx_insn *insn = BB_END (bb);
26278 while (insn)
26280 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26282 rtx set = single_set (insn);
26283 if (set)
26285 avoid_func_arg_motion (arg, insn);
26286 return;
26289 if (insn == BB_HEAD (bb))
26290 return;
26291 insn = PREV_INSN (insn);
26295 /* Hook for pre-reload schedule - avoid motion of function arguments
26296 passed in likely spilled HW registers. */
26297 static void
26298 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26300 rtx_insn *insn;
26301 rtx_insn *first_arg = NULL;
26302 if (reload_completed)
26303 return;
26304 while (head != tail && DEBUG_INSN_P (head))
26305 head = NEXT_INSN (head);
26306 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26307 if (INSN_P (insn) && CALL_P (insn))
26309 first_arg = add_parameter_dependencies (insn, head);
26310 if (first_arg)
26312 /* Add dependee for first argument to predecessors if only
26313 region contains more than one block. */
26314 basic_block bb = BLOCK_FOR_INSN (insn);
26315 int rgn = CONTAINING_RGN (bb->index);
26316 int nr_blks = RGN_NR_BLOCKS (rgn);
26317 /* Skip trivial regions and region head blocks that can have
26318 predecessors outside of region. */
26319 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26321 edge e;
26322 edge_iterator ei;
26324 /* Regions are SCCs with the exception of selective
26325 scheduling with pipelining of outer blocks enabled.
26326 So also check that immediate predecessors of a non-head
26327 block are in the same region. */
26328 FOR_EACH_EDGE (e, ei, bb->preds)
26330 /* Avoid creating of loop-carried dependencies through
26331 using topological ordering in the region. */
26332 if (rgn == CONTAINING_RGN (e->src->index)
26333 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26334 add_dependee_for_func_arg (first_arg, e->src);
26337 insn = first_arg;
26338 if (insn == head)
26339 break;
26342 else if (first_arg)
26343 avoid_func_arg_motion (first_arg, insn);
26346 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26347 HW registers to maximum, to schedule them at soon as possible. These are
26348 moves from function argument registers at the top of the function entry
26349 and moves from function return value registers after call. */
26350 static int
26351 ix86_adjust_priority (rtx_insn *insn, int priority)
26353 rtx set;
26355 if (reload_completed)
26356 return priority;
26358 if (!NONDEBUG_INSN_P (insn))
26359 return priority;
26361 set = single_set (insn);
26362 if (set)
26364 rtx tmp = SET_SRC (set);
26365 if (REG_P (tmp)
26366 && HARD_REGISTER_P (tmp)
26367 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26368 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26369 return current_sched_info->sched_max_insns_priority;
26372 return priority;
26375 /* Model decoder of Core 2/i7.
26376 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26377 track the instruction fetch block boundaries and make sure that long
26378 (9+ bytes) instructions are assigned to D0. */
26380 /* Maximum length of an insn that can be handled by
26381 a secondary decoder unit. '8' for Core 2/i7. */
26382 static int core2i7_secondary_decoder_max_insn_size;
26384 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26385 '16' for Core 2/i7. */
26386 static int core2i7_ifetch_block_size;
26388 /* Maximum number of instructions decoder can handle per cycle.
26389 '6' for Core 2/i7. */
26390 static int core2i7_ifetch_block_max_insns;
26392 typedef struct ix86_first_cycle_multipass_data_ *
26393 ix86_first_cycle_multipass_data_t;
26394 typedef const struct ix86_first_cycle_multipass_data_ *
26395 const_ix86_first_cycle_multipass_data_t;
26397 /* A variable to store target state across calls to max_issue within
26398 one cycle. */
26399 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26400 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26402 /* Initialize DATA. */
26403 static void
26404 core2i7_first_cycle_multipass_init (void *_data)
26406 ix86_first_cycle_multipass_data_t data
26407 = (ix86_first_cycle_multipass_data_t) _data;
26409 data->ifetch_block_len = 0;
26410 data->ifetch_block_n_insns = 0;
26411 data->ready_try_change = NULL;
26412 data->ready_try_change_size = 0;
26415 /* Advancing the cycle; reset ifetch block counts. */
26416 static void
26417 core2i7_dfa_post_advance_cycle (void)
26419 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26421 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26423 data->ifetch_block_len = 0;
26424 data->ifetch_block_n_insns = 0;
26427 static int min_insn_size (rtx);
26429 /* Filter out insns from ready_try that the core will not be able to issue
26430 on current cycle due to decoder. */
26431 static void
26432 core2i7_first_cycle_multipass_filter_ready_try
26433 (const_ix86_first_cycle_multipass_data_t data,
26434 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26436 while (n_ready--)
26438 rtx insn;
26439 int insn_size;
26441 if (ready_try[n_ready])
26442 continue;
26444 insn = get_ready_element (n_ready);
26445 insn_size = min_insn_size (insn);
26447 if (/* If this is a too long an insn for a secondary decoder ... */
26448 (!first_cycle_insn_p
26449 && insn_size > core2i7_secondary_decoder_max_insn_size)
26450 /* ... or it would not fit into the ifetch block ... */
26451 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26452 /* ... or the decoder is full already ... */
26453 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26454 /* ... mask the insn out. */
26456 ready_try[n_ready] = 1;
26458 if (data->ready_try_change)
26459 bitmap_set_bit (data->ready_try_change, n_ready);
26464 /* Prepare for a new round of multipass lookahead scheduling. */
26465 static void
26466 core2i7_first_cycle_multipass_begin (void *_data,
26467 signed char *ready_try, int n_ready,
26468 bool first_cycle_insn_p)
26470 ix86_first_cycle_multipass_data_t data
26471 = (ix86_first_cycle_multipass_data_t) _data;
26472 const_ix86_first_cycle_multipass_data_t prev_data
26473 = ix86_first_cycle_multipass_data;
26475 /* Restore the state from the end of the previous round. */
26476 data->ifetch_block_len = prev_data->ifetch_block_len;
26477 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26479 /* Filter instructions that cannot be issued on current cycle due to
26480 decoder restrictions. */
26481 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26482 first_cycle_insn_p);
26485 /* INSN is being issued in current solution. Account for its impact on
26486 the decoder model. */
26487 static void
26488 core2i7_first_cycle_multipass_issue (void *_data,
26489 signed char *ready_try, int n_ready,
26490 rtx_insn *insn, const void *_prev_data)
26492 ix86_first_cycle_multipass_data_t data
26493 = (ix86_first_cycle_multipass_data_t) _data;
26494 const_ix86_first_cycle_multipass_data_t prev_data
26495 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26497 int insn_size = min_insn_size (insn);
26499 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26500 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26501 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26502 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26504 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26505 if (!data->ready_try_change)
26507 data->ready_try_change = sbitmap_alloc (n_ready);
26508 data->ready_try_change_size = n_ready;
26510 else if (data->ready_try_change_size < n_ready)
26512 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26513 n_ready, 0);
26514 data->ready_try_change_size = n_ready;
26516 bitmap_clear (data->ready_try_change);
26518 /* Filter out insns from ready_try that the core will not be able to issue
26519 on current cycle due to decoder. */
26520 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26521 false);
26524 /* Revert the effect on ready_try. */
26525 static void
26526 core2i7_first_cycle_multipass_backtrack (const void *_data,
26527 signed char *ready_try,
26528 int n_ready ATTRIBUTE_UNUSED)
26530 const_ix86_first_cycle_multipass_data_t data
26531 = (const_ix86_first_cycle_multipass_data_t) _data;
26532 unsigned int i = 0;
26533 sbitmap_iterator sbi;
26535 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26536 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26538 ready_try[i] = 0;
26542 /* Save the result of multipass lookahead scheduling for the next round. */
26543 static void
26544 core2i7_first_cycle_multipass_end (const void *_data)
26546 const_ix86_first_cycle_multipass_data_t data
26547 = (const_ix86_first_cycle_multipass_data_t) _data;
26548 ix86_first_cycle_multipass_data_t next_data
26549 = ix86_first_cycle_multipass_data;
26551 if (data != NULL)
26553 next_data->ifetch_block_len = data->ifetch_block_len;
26554 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26558 /* Deallocate target data. */
26559 static void
26560 core2i7_first_cycle_multipass_fini (void *_data)
26562 ix86_first_cycle_multipass_data_t data
26563 = (ix86_first_cycle_multipass_data_t) _data;
26565 if (data->ready_try_change)
26567 sbitmap_free (data->ready_try_change);
26568 data->ready_try_change = NULL;
26569 data->ready_try_change_size = 0;
26573 /* Prepare for scheduling pass. */
26574 static void
26575 ix86_sched_init_global (FILE *, int, int)
26577 /* Install scheduling hooks for current CPU. Some of these hooks are used
26578 in time-critical parts of the scheduler, so we only set them up when
26579 they are actually used. */
26580 switch (ix86_tune)
26582 case PROCESSOR_CORE2:
26583 case PROCESSOR_NEHALEM:
26584 case PROCESSOR_SANDYBRIDGE:
26585 case PROCESSOR_HASWELL:
26586 /* Do not perform multipass scheduling for pre-reload schedule
26587 to save compile time. */
26588 if (reload_completed)
26590 targetm.sched.dfa_post_advance_cycle
26591 = core2i7_dfa_post_advance_cycle;
26592 targetm.sched.first_cycle_multipass_init
26593 = core2i7_first_cycle_multipass_init;
26594 targetm.sched.first_cycle_multipass_begin
26595 = core2i7_first_cycle_multipass_begin;
26596 targetm.sched.first_cycle_multipass_issue
26597 = core2i7_first_cycle_multipass_issue;
26598 targetm.sched.first_cycle_multipass_backtrack
26599 = core2i7_first_cycle_multipass_backtrack;
26600 targetm.sched.first_cycle_multipass_end
26601 = core2i7_first_cycle_multipass_end;
26602 targetm.sched.first_cycle_multipass_fini
26603 = core2i7_first_cycle_multipass_fini;
26605 /* Set decoder parameters. */
26606 core2i7_secondary_decoder_max_insn_size = 8;
26607 core2i7_ifetch_block_size = 16;
26608 core2i7_ifetch_block_max_insns = 6;
26609 break;
26611 /* ... Fall through ... */
26612 default:
26613 targetm.sched.dfa_post_advance_cycle = NULL;
26614 targetm.sched.first_cycle_multipass_init = NULL;
26615 targetm.sched.first_cycle_multipass_begin = NULL;
26616 targetm.sched.first_cycle_multipass_issue = NULL;
26617 targetm.sched.first_cycle_multipass_backtrack = NULL;
26618 targetm.sched.first_cycle_multipass_end = NULL;
26619 targetm.sched.first_cycle_multipass_fini = NULL;
26620 break;
26625 /* Compute the alignment given to a constant that is being placed in memory.
26626 EXP is the constant and ALIGN is the alignment that the object would
26627 ordinarily have.
26628 The value of this function is used instead of that alignment to align
26629 the object. */
26632 ix86_constant_alignment (tree exp, int align)
26634 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26635 || TREE_CODE (exp) == INTEGER_CST)
26637 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26638 return 64;
26639 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26640 return 128;
26642 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26643 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26644 return BITS_PER_WORD;
26646 return align;
26649 /* Compute the alignment for a static variable.
26650 TYPE is the data type, and ALIGN is the alignment that
26651 the object would ordinarily have. The value of this function is used
26652 instead of that alignment to align the object. */
26655 ix86_data_alignment (tree type, int align, bool opt)
26657 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26658 for symbols from other compilation units or symbols that don't need
26659 to bind locally. In order to preserve some ABI compatibility with
26660 those compilers, ensure we don't decrease alignment from what we
26661 used to assume. */
26663 int max_align_compat
26664 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26666 /* A data structure, equal or greater than the size of a cache line
26667 (64 bytes in the Pentium 4 and other recent Intel processors, including
26668 processors based on Intel Core microarchitecture) should be aligned
26669 so that its base address is a multiple of a cache line size. */
26671 int max_align
26672 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26674 if (max_align < BITS_PER_WORD)
26675 max_align = BITS_PER_WORD;
26677 if (opt
26678 && AGGREGATE_TYPE_P (type)
26679 && TYPE_SIZE (type)
26680 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26682 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26683 && align < max_align_compat)
26684 align = max_align_compat;
26685 if (wi::geu_p (TYPE_SIZE (type), max_align)
26686 && align < max_align)
26687 align = max_align;
26690 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26691 to 16byte boundary. */
26692 if (TARGET_64BIT)
26694 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26695 && TYPE_SIZE (type)
26696 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26697 && wi::geu_p (TYPE_SIZE (type), 128)
26698 && align < 128)
26699 return 128;
26702 if (!opt)
26703 return align;
26705 if (TREE_CODE (type) == ARRAY_TYPE)
26707 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26708 return 64;
26709 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26710 return 128;
26712 else if (TREE_CODE (type) == COMPLEX_TYPE)
26715 if (TYPE_MODE (type) == DCmode && align < 64)
26716 return 64;
26717 if ((TYPE_MODE (type) == XCmode
26718 || TYPE_MODE (type) == TCmode) && align < 128)
26719 return 128;
26721 else if ((TREE_CODE (type) == RECORD_TYPE
26722 || TREE_CODE (type) == UNION_TYPE
26723 || TREE_CODE (type) == QUAL_UNION_TYPE)
26724 && TYPE_FIELDS (type))
26726 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26727 return 64;
26728 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26729 return 128;
26731 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26732 || TREE_CODE (type) == INTEGER_TYPE)
26734 if (TYPE_MODE (type) == DFmode && align < 64)
26735 return 64;
26736 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26737 return 128;
26740 return align;
26743 /* Compute the alignment for a local variable or a stack slot. EXP is
26744 the data type or decl itself, MODE is the widest mode available and
26745 ALIGN is the alignment that the object would ordinarily have. The
26746 value of this macro is used instead of that alignment to align the
26747 object. */
26749 unsigned int
26750 ix86_local_alignment (tree exp, enum machine_mode mode,
26751 unsigned int align)
26753 tree type, decl;
26755 if (exp && DECL_P (exp))
26757 type = TREE_TYPE (exp);
26758 decl = exp;
26760 else
26762 type = exp;
26763 decl = NULL;
26766 /* Don't do dynamic stack realignment for long long objects with
26767 -mpreferred-stack-boundary=2. */
26768 if (!TARGET_64BIT
26769 && align == 64
26770 && ix86_preferred_stack_boundary < 64
26771 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26772 && (!type || !TYPE_USER_ALIGN (type))
26773 && (!decl || !DECL_USER_ALIGN (decl)))
26774 align = 32;
26776 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26777 register in MODE. We will return the largest alignment of XF
26778 and DF. */
26779 if (!type)
26781 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26782 align = GET_MODE_ALIGNMENT (DFmode);
26783 return align;
26786 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26787 to 16byte boundary. Exact wording is:
26789 An array uses the same alignment as its elements, except that a local or
26790 global array variable of length at least 16 bytes or
26791 a C99 variable-length array variable always has alignment of at least 16 bytes.
26793 This was added to allow use of aligned SSE instructions at arrays. This
26794 rule is meant for static storage (where compiler can not do the analysis
26795 by itself). We follow it for automatic variables only when convenient.
26796 We fully control everything in the function compiled and functions from
26797 other unit can not rely on the alignment.
26799 Exclude va_list type. It is the common case of local array where
26800 we can not benefit from the alignment.
26802 TODO: Probably one should optimize for size only when var is not escaping. */
26803 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26804 && TARGET_SSE)
26806 if (AGGREGATE_TYPE_P (type)
26807 && (va_list_type_node == NULL_TREE
26808 || (TYPE_MAIN_VARIANT (type)
26809 != TYPE_MAIN_VARIANT (va_list_type_node)))
26810 && TYPE_SIZE (type)
26811 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26812 && wi::geu_p (TYPE_SIZE (type), 16)
26813 && align < 128)
26814 return 128;
26816 if (TREE_CODE (type) == ARRAY_TYPE)
26818 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26819 return 64;
26820 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26821 return 128;
26823 else if (TREE_CODE (type) == COMPLEX_TYPE)
26825 if (TYPE_MODE (type) == DCmode && align < 64)
26826 return 64;
26827 if ((TYPE_MODE (type) == XCmode
26828 || TYPE_MODE (type) == TCmode) && align < 128)
26829 return 128;
26831 else if ((TREE_CODE (type) == RECORD_TYPE
26832 || TREE_CODE (type) == UNION_TYPE
26833 || TREE_CODE (type) == QUAL_UNION_TYPE)
26834 && TYPE_FIELDS (type))
26836 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26837 return 64;
26838 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26839 return 128;
26841 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26842 || TREE_CODE (type) == INTEGER_TYPE)
26845 if (TYPE_MODE (type) == DFmode && align < 64)
26846 return 64;
26847 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26848 return 128;
26850 return align;
26853 /* Compute the minimum required alignment for dynamic stack realignment
26854 purposes for a local variable, parameter or a stack slot. EXP is
26855 the data type or decl itself, MODE is its mode and ALIGN is the
26856 alignment that the object would ordinarily have. */
26858 unsigned int
26859 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26860 unsigned int align)
26862 tree type, decl;
26864 if (exp && DECL_P (exp))
26866 type = TREE_TYPE (exp);
26867 decl = exp;
26869 else
26871 type = exp;
26872 decl = NULL;
26875 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26876 return align;
26878 /* Don't do dynamic stack realignment for long long objects with
26879 -mpreferred-stack-boundary=2. */
26880 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26881 && (!type || !TYPE_USER_ALIGN (type))
26882 && (!decl || !DECL_USER_ALIGN (decl)))
26883 return 32;
26885 return align;
26888 /* Find a location for the static chain incoming to a nested function.
26889 This is a register, unless all free registers are used by arguments. */
26891 static rtx
26892 ix86_static_chain (const_tree fndecl, bool incoming_p)
26894 unsigned regno;
26896 if (!DECL_STATIC_CHAIN (fndecl))
26897 return NULL;
26899 if (TARGET_64BIT)
26901 /* We always use R10 in 64-bit mode. */
26902 regno = R10_REG;
26904 else
26906 tree fntype;
26907 unsigned int ccvt;
26909 /* By default in 32-bit mode we use ECX to pass the static chain. */
26910 regno = CX_REG;
26912 fntype = TREE_TYPE (fndecl);
26913 ccvt = ix86_get_callcvt (fntype);
26914 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26916 /* Fastcall functions use ecx/edx for arguments, which leaves
26917 us with EAX for the static chain.
26918 Thiscall functions use ecx for arguments, which also
26919 leaves us with EAX for the static chain. */
26920 regno = AX_REG;
26922 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26924 /* Thiscall functions use ecx for arguments, which leaves
26925 us with EAX and EDX for the static chain.
26926 We are using for abi-compatibility EAX. */
26927 regno = AX_REG;
26929 else if (ix86_function_regparm (fntype, fndecl) == 3)
26931 /* For regparm 3, we have no free call-clobbered registers in
26932 which to store the static chain. In order to implement this,
26933 we have the trampoline push the static chain to the stack.
26934 However, we can't push a value below the return address when
26935 we call the nested function directly, so we have to use an
26936 alternate entry point. For this we use ESI, and have the
26937 alternate entry point push ESI, so that things appear the
26938 same once we're executing the nested function. */
26939 if (incoming_p)
26941 if (fndecl == current_function_decl)
26942 ix86_static_chain_on_stack = true;
26943 return gen_frame_mem (SImode,
26944 plus_constant (Pmode,
26945 arg_pointer_rtx, -8));
26947 regno = SI_REG;
26951 return gen_rtx_REG (Pmode, regno);
26954 /* Emit RTL insns to initialize the variable parts of a trampoline.
26955 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26956 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26957 to be passed to the target function. */
26959 static void
26960 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26962 rtx mem, fnaddr;
26963 int opcode;
26964 int offset = 0;
26966 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26968 if (TARGET_64BIT)
26970 int size;
26972 /* Load the function address to r11. Try to load address using
26973 the shorter movl instead of movabs. We may want to support
26974 movq for kernel mode, but kernel does not use trampolines at
26975 the moment. FNADDR is a 32bit address and may not be in
26976 DImode when ptr_mode == SImode. Always use movl in this
26977 case. */
26978 if (ptr_mode == SImode
26979 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26981 fnaddr = copy_addr_to_reg (fnaddr);
26983 mem = adjust_address (m_tramp, HImode, offset);
26984 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26986 mem = adjust_address (m_tramp, SImode, offset + 2);
26987 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26988 offset += 6;
26990 else
26992 mem = adjust_address (m_tramp, HImode, offset);
26993 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26995 mem = adjust_address (m_tramp, DImode, offset + 2);
26996 emit_move_insn (mem, fnaddr);
26997 offset += 10;
27000 /* Load static chain using movabs to r10. Use the shorter movl
27001 instead of movabs when ptr_mode == SImode. */
27002 if (ptr_mode == SImode)
27004 opcode = 0xba41;
27005 size = 6;
27007 else
27009 opcode = 0xba49;
27010 size = 10;
27013 mem = adjust_address (m_tramp, HImode, offset);
27014 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27016 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27017 emit_move_insn (mem, chain_value);
27018 offset += size;
27020 /* Jump to r11; the last (unused) byte is a nop, only there to
27021 pad the write out to a single 32-bit store. */
27022 mem = adjust_address (m_tramp, SImode, offset);
27023 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27024 offset += 4;
27026 else
27028 rtx disp, chain;
27030 /* Depending on the static chain location, either load a register
27031 with a constant, or push the constant to the stack. All of the
27032 instructions are the same size. */
27033 chain = ix86_static_chain (fndecl, true);
27034 if (REG_P (chain))
27036 switch (REGNO (chain))
27038 case AX_REG:
27039 opcode = 0xb8; break;
27040 case CX_REG:
27041 opcode = 0xb9; break;
27042 default:
27043 gcc_unreachable ();
27046 else
27047 opcode = 0x68;
27049 mem = adjust_address (m_tramp, QImode, offset);
27050 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27052 mem = adjust_address (m_tramp, SImode, offset + 1);
27053 emit_move_insn (mem, chain_value);
27054 offset += 5;
27056 mem = adjust_address (m_tramp, QImode, offset);
27057 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27059 mem = adjust_address (m_tramp, SImode, offset + 1);
27061 /* Compute offset from the end of the jmp to the target function.
27062 In the case in which the trampoline stores the static chain on
27063 the stack, we need to skip the first insn which pushes the
27064 (call-saved) register static chain; this push is 1 byte. */
27065 offset += 5;
27066 disp = expand_binop (SImode, sub_optab, fnaddr,
27067 plus_constant (Pmode, XEXP (m_tramp, 0),
27068 offset - (MEM_P (chain) ? 1 : 0)),
27069 NULL_RTX, 1, OPTAB_DIRECT);
27070 emit_move_insn (mem, disp);
27073 gcc_assert (offset <= TRAMPOLINE_SIZE);
27075 #ifdef HAVE_ENABLE_EXECUTE_STACK
27076 #ifdef CHECK_EXECUTE_STACK_ENABLED
27077 if (CHECK_EXECUTE_STACK_ENABLED)
27078 #endif
27079 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27080 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27081 #endif
27084 /* The following file contains several enumerations and data structures
27085 built from the definitions in i386-builtin-types.def. */
27087 #include "i386-builtin-types.inc"
27089 /* Table for the ix86 builtin non-function types. */
27090 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27092 /* Retrieve an element from the above table, building some of
27093 the types lazily. */
27095 static tree
27096 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27098 unsigned int index;
27099 tree type, itype;
27101 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27103 type = ix86_builtin_type_tab[(int) tcode];
27104 if (type != NULL)
27105 return type;
27107 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27108 if (tcode <= IX86_BT_LAST_VECT)
27110 enum machine_mode mode;
27112 index = tcode - IX86_BT_LAST_PRIM - 1;
27113 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27114 mode = ix86_builtin_type_vect_mode[index];
27116 type = build_vector_type_for_mode (itype, mode);
27118 else
27120 int quals;
27122 index = tcode - IX86_BT_LAST_VECT - 1;
27123 if (tcode <= IX86_BT_LAST_PTR)
27124 quals = TYPE_UNQUALIFIED;
27125 else
27126 quals = TYPE_QUAL_CONST;
27128 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27129 if (quals != TYPE_UNQUALIFIED)
27130 itype = build_qualified_type (itype, quals);
27132 type = build_pointer_type (itype);
27135 ix86_builtin_type_tab[(int) tcode] = type;
27136 return type;
27139 /* Table for the ix86 builtin function types. */
27140 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27142 /* Retrieve an element from the above table, building some of
27143 the types lazily. */
27145 static tree
27146 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27148 tree type;
27150 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27152 type = ix86_builtin_func_type_tab[(int) tcode];
27153 if (type != NULL)
27154 return type;
27156 if (tcode <= IX86_BT_LAST_FUNC)
27158 unsigned start = ix86_builtin_func_start[(int) tcode];
27159 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27160 tree rtype, atype, args = void_list_node;
27161 unsigned i;
27163 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27164 for (i = after - 1; i > start; --i)
27166 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27167 args = tree_cons (NULL, atype, args);
27170 type = build_function_type (rtype, args);
27172 else
27174 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27175 enum ix86_builtin_func_type icode;
27177 icode = ix86_builtin_func_alias_base[index];
27178 type = ix86_get_builtin_func_type (icode);
27181 ix86_builtin_func_type_tab[(int) tcode] = type;
27182 return type;
27186 /* Codes for all the SSE/MMX builtins. */
27187 enum ix86_builtins
27189 IX86_BUILTIN_ADDPS,
27190 IX86_BUILTIN_ADDSS,
27191 IX86_BUILTIN_DIVPS,
27192 IX86_BUILTIN_DIVSS,
27193 IX86_BUILTIN_MULPS,
27194 IX86_BUILTIN_MULSS,
27195 IX86_BUILTIN_SUBPS,
27196 IX86_BUILTIN_SUBSS,
27198 IX86_BUILTIN_CMPEQPS,
27199 IX86_BUILTIN_CMPLTPS,
27200 IX86_BUILTIN_CMPLEPS,
27201 IX86_BUILTIN_CMPGTPS,
27202 IX86_BUILTIN_CMPGEPS,
27203 IX86_BUILTIN_CMPNEQPS,
27204 IX86_BUILTIN_CMPNLTPS,
27205 IX86_BUILTIN_CMPNLEPS,
27206 IX86_BUILTIN_CMPNGTPS,
27207 IX86_BUILTIN_CMPNGEPS,
27208 IX86_BUILTIN_CMPORDPS,
27209 IX86_BUILTIN_CMPUNORDPS,
27210 IX86_BUILTIN_CMPEQSS,
27211 IX86_BUILTIN_CMPLTSS,
27212 IX86_BUILTIN_CMPLESS,
27213 IX86_BUILTIN_CMPNEQSS,
27214 IX86_BUILTIN_CMPNLTSS,
27215 IX86_BUILTIN_CMPNLESS,
27216 IX86_BUILTIN_CMPORDSS,
27217 IX86_BUILTIN_CMPUNORDSS,
27219 IX86_BUILTIN_COMIEQSS,
27220 IX86_BUILTIN_COMILTSS,
27221 IX86_BUILTIN_COMILESS,
27222 IX86_BUILTIN_COMIGTSS,
27223 IX86_BUILTIN_COMIGESS,
27224 IX86_BUILTIN_COMINEQSS,
27225 IX86_BUILTIN_UCOMIEQSS,
27226 IX86_BUILTIN_UCOMILTSS,
27227 IX86_BUILTIN_UCOMILESS,
27228 IX86_BUILTIN_UCOMIGTSS,
27229 IX86_BUILTIN_UCOMIGESS,
27230 IX86_BUILTIN_UCOMINEQSS,
27232 IX86_BUILTIN_CVTPI2PS,
27233 IX86_BUILTIN_CVTPS2PI,
27234 IX86_BUILTIN_CVTSI2SS,
27235 IX86_BUILTIN_CVTSI642SS,
27236 IX86_BUILTIN_CVTSS2SI,
27237 IX86_BUILTIN_CVTSS2SI64,
27238 IX86_BUILTIN_CVTTPS2PI,
27239 IX86_BUILTIN_CVTTSS2SI,
27240 IX86_BUILTIN_CVTTSS2SI64,
27242 IX86_BUILTIN_MAXPS,
27243 IX86_BUILTIN_MAXSS,
27244 IX86_BUILTIN_MINPS,
27245 IX86_BUILTIN_MINSS,
27247 IX86_BUILTIN_LOADUPS,
27248 IX86_BUILTIN_STOREUPS,
27249 IX86_BUILTIN_MOVSS,
27251 IX86_BUILTIN_MOVHLPS,
27252 IX86_BUILTIN_MOVLHPS,
27253 IX86_BUILTIN_LOADHPS,
27254 IX86_BUILTIN_LOADLPS,
27255 IX86_BUILTIN_STOREHPS,
27256 IX86_BUILTIN_STORELPS,
27258 IX86_BUILTIN_MASKMOVQ,
27259 IX86_BUILTIN_MOVMSKPS,
27260 IX86_BUILTIN_PMOVMSKB,
27262 IX86_BUILTIN_MOVNTPS,
27263 IX86_BUILTIN_MOVNTQ,
27265 IX86_BUILTIN_LOADDQU,
27266 IX86_BUILTIN_STOREDQU,
27268 IX86_BUILTIN_PACKSSWB,
27269 IX86_BUILTIN_PACKSSDW,
27270 IX86_BUILTIN_PACKUSWB,
27272 IX86_BUILTIN_PADDB,
27273 IX86_BUILTIN_PADDW,
27274 IX86_BUILTIN_PADDD,
27275 IX86_BUILTIN_PADDQ,
27276 IX86_BUILTIN_PADDSB,
27277 IX86_BUILTIN_PADDSW,
27278 IX86_BUILTIN_PADDUSB,
27279 IX86_BUILTIN_PADDUSW,
27280 IX86_BUILTIN_PSUBB,
27281 IX86_BUILTIN_PSUBW,
27282 IX86_BUILTIN_PSUBD,
27283 IX86_BUILTIN_PSUBQ,
27284 IX86_BUILTIN_PSUBSB,
27285 IX86_BUILTIN_PSUBSW,
27286 IX86_BUILTIN_PSUBUSB,
27287 IX86_BUILTIN_PSUBUSW,
27289 IX86_BUILTIN_PAND,
27290 IX86_BUILTIN_PANDN,
27291 IX86_BUILTIN_POR,
27292 IX86_BUILTIN_PXOR,
27294 IX86_BUILTIN_PAVGB,
27295 IX86_BUILTIN_PAVGW,
27297 IX86_BUILTIN_PCMPEQB,
27298 IX86_BUILTIN_PCMPEQW,
27299 IX86_BUILTIN_PCMPEQD,
27300 IX86_BUILTIN_PCMPGTB,
27301 IX86_BUILTIN_PCMPGTW,
27302 IX86_BUILTIN_PCMPGTD,
27304 IX86_BUILTIN_PMADDWD,
27306 IX86_BUILTIN_PMAXSW,
27307 IX86_BUILTIN_PMAXUB,
27308 IX86_BUILTIN_PMINSW,
27309 IX86_BUILTIN_PMINUB,
27311 IX86_BUILTIN_PMULHUW,
27312 IX86_BUILTIN_PMULHW,
27313 IX86_BUILTIN_PMULLW,
27315 IX86_BUILTIN_PSADBW,
27316 IX86_BUILTIN_PSHUFW,
27318 IX86_BUILTIN_PSLLW,
27319 IX86_BUILTIN_PSLLD,
27320 IX86_BUILTIN_PSLLQ,
27321 IX86_BUILTIN_PSRAW,
27322 IX86_BUILTIN_PSRAD,
27323 IX86_BUILTIN_PSRLW,
27324 IX86_BUILTIN_PSRLD,
27325 IX86_BUILTIN_PSRLQ,
27326 IX86_BUILTIN_PSLLWI,
27327 IX86_BUILTIN_PSLLDI,
27328 IX86_BUILTIN_PSLLQI,
27329 IX86_BUILTIN_PSRAWI,
27330 IX86_BUILTIN_PSRADI,
27331 IX86_BUILTIN_PSRLWI,
27332 IX86_BUILTIN_PSRLDI,
27333 IX86_BUILTIN_PSRLQI,
27335 IX86_BUILTIN_PUNPCKHBW,
27336 IX86_BUILTIN_PUNPCKHWD,
27337 IX86_BUILTIN_PUNPCKHDQ,
27338 IX86_BUILTIN_PUNPCKLBW,
27339 IX86_BUILTIN_PUNPCKLWD,
27340 IX86_BUILTIN_PUNPCKLDQ,
27342 IX86_BUILTIN_SHUFPS,
27344 IX86_BUILTIN_RCPPS,
27345 IX86_BUILTIN_RCPSS,
27346 IX86_BUILTIN_RSQRTPS,
27347 IX86_BUILTIN_RSQRTPS_NR,
27348 IX86_BUILTIN_RSQRTSS,
27349 IX86_BUILTIN_RSQRTF,
27350 IX86_BUILTIN_SQRTPS,
27351 IX86_BUILTIN_SQRTPS_NR,
27352 IX86_BUILTIN_SQRTSS,
27354 IX86_BUILTIN_UNPCKHPS,
27355 IX86_BUILTIN_UNPCKLPS,
27357 IX86_BUILTIN_ANDPS,
27358 IX86_BUILTIN_ANDNPS,
27359 IX86_BUILTIN_ORPS,
27360 IX86_BUILTIN_XORPS,
27362 IX86_BUILTIN_EMMS,
27363 IX86_BUILTIN_LDMXCSR,
27364 IX86_BUILTIN_STMXCSR,
27365 IX86_BUILTIN_SFENCE,
27367 IX86_BUILTIN_FXSAVE,
27368 IX86_BUILTIN_FXRSTOR,
27369 IX86_BUILTIN_FXSAVE64,
27370 IX86_BUILTIN_FXRSTOR64,
27372 IX86_BUILTIN_XSAVE,
27373 IX86_BUILTIN_XRSTOR,
27374 IX86_BUILTIN_XSAVE64,
27375 IX86_BUILTIN_XRSTOR64,
27377 IX86_BUILTIN_XSAVEOPT,
27378 IX86_BUILTIN_XSAVEOPT64,
27380 IX86_BUILTIN_XSAVEC,
27381 IX86_BUILTIN_XSAVEC64,
27383 IX86_BUILTIN_XSAVES,
27384 IX86_BUILTIN_XRSTORS,
27385 IX86_BUILTIN_XSAVES64,
27386 IX86_BUILTIN_XRSTORS64,
27388 /* 3DNow! Original */
27389 IX86_BUILTIN_FEMMS,
27390 IX86_BUILTIN_PAVGUSB,
27391 IX86_BUILTIN_PF2ID,
27392 IX86_BUILTIN_PFACC,
27393 IX86_BUILTIN_PFADD,
27394 IX86_BUILTIN_PFCMPEQ,
27395 IX86_BUILTIN_PFCMPGE,
27396 IX86_BUILTIN_PFCMPGT,
27397 IX86_BUILTIN_PFMAX,
27398 IX86_BUILTIN_PFMIN,
27399 IX86_BUILTIN_PFMUL,
27400 IX86_BUILTIN_PFRCP,
27401 IX86_BUILTIN_PFRCPIT1,
27402 IX86_BUILTIN_PFRCPIT2,
27403 IX86_BUILTIN_PFRSQIT1,
27404 IX86_BUILTIN_PFRSQRT,
27405 IX86_BUILTIN_PFSUB,
27406 IX86_BUILTIN_PFSUBR,
27407 IX86_BUILTIN_PI2FD,
27408 IX86_BUILTIN_PMULHRW,
27410 /* 3DNow! Athlon Extensions */
27411 IX86_BUILTIN_PF2IW,
27412 IX86_BUILTIN_PFNACC,
27413 IX86_BUILTIN_PFPNACC,
27414 IX86_BUILTIN_PI2FW,
27415 IX86_BUILTIN_PSWAPDSI,
27416 IX86_BUILTIN_PSWAPDSF,
27418 /* SSE2 */
27419 IX86_BUILTIN_ADDPD,
27420 IX86_BUILTIN_ADDSD,
27421 IX86_BUILTIN_DIVPD,
27422 IX86_BUILTIN_DIVSD,
27423 IX86_BUILTIN_MULPD,
27424 IX86_BUILTIN_MULSD,
27425 IX86_BUILTIN_SUBPD,
27426 IX86_BUILTIN_SUBSD,
27428 IX86_BUILTIN_CMPEQPD,
27429 IX86_BUILTIN_CMPLTPD,
27430 IX86_BUILTIN_CMPLEPD,
27431 IX86_BUILTIN_CMPGTPD,
27432 IX86_BUILTIN_CMPGEPD,
27433 IX86_BUILTIN_CMPNEQPD,
27434 IX86_BUILTIN_CMPNLTPD,
27435 IX86_BUILTIN_CMPNLEPD,
27436 IX86_BUILTIN_CMPNGTPD,
27437 IX86_BUILTIN_CMPNGEPD,
27438 IX86_BUILTIN_CMPORDPD,
27439 IX86_BUILTIN_CMPUNORDPD,
27440 IX86_BUILTIN_CMPEQSD,
27441 IX86_BUILTIN_CMPLTSD,
27442 IX86_BUILTIN_CMPLESD,
27443 IX86_BUILTIN_CMPNEQSD,
27444 IX86_BUILTIN_CMPNLTSD,
27445 IX86_BUILTIN_CMPNLESD,
27446 IX86_BUILTIN_CMPORDSD,
27447 IX86_BUILTIN_CMPUNORDSD,
27449 IX86_BUILTIN_COMIEQSD,
27450 IX86_BUILTIN_COMILTSD,
27451 IX86_BUILTIN_COMILESD,
27452 IX86_BUILTIN_COMIGTSD,
27453 IX86_BUILTIN_COMIGESD,
27454 IX86_BUILTIN_COMINEQSD,
27455 IX86_BUILTIN_UCOMIEQSD,
27456 IX86_BUILTIN_UCOMILTSD,
27457 IX86_BUILTIN_UCOMILESD,
27458 IX86_BUILTIN_UCOMIGTSD,
27459 IX86_BUILTIN_UCOMIGESD,
27460 IX86_BUILTIN_UCOMINEQSD,
27462 IX86_BUILTIN_MAXPD,
27463 IX86_BUILTIN_MAXSD,
27464 IX86_BUILTIN_MINPD,
27465 IX86_BUILTIN_MINSD,
27467 IX86_BUILTIN_ANDPD,
27468 IX86_BUILTIN_ANDNPD,
27469 IX86_BUILTIN_ORPD,
27470 IX86_BUILTIN_XORPD,
27472 IX86_BUILTIN_SQRTPD,
27473 IX86_BUILTIN_SQRTSD,
27475 IX86_BUILTIN_UNPCKHPD,
27476 IX86_BUILTIN_UNPCKLPD,
27478 IX86_BUILTIN_SHUFPD,
27480 IX86_BUILTIN_LOADUPD,
27481 IX86_BUILTIN_STOREUPD,
27482 IX86_BUILTIN_MOVSD,
27484 IX86_BUILTIN_LOADHPD,
27485 IX86_BUILTIN_LOADLPD,
27487 IX86_BUILTIN_CVTDQ2PD,
27488 IX86_BUILTIN_CVTDQ2PS,
27490 IX86_BUILTIN_CVTPD2DQ,
27491 IX86_BUILTIN_CVTPD2PI,
27492 IX86_BUILTIN_CVTPD2PS,
27493 IX86_BUILTIN_CVTTPD2DQ,
27494 IX86_BUILTIN_CVTTPD2PI,
27496 IX86_BUILTIN_CVTPI2PD,
27497 IX86_BUILTIN_CVTSI2SD,
27498 IX86_BUILTIN_CVTSI642SD,
27500 IX86_BUILTIN_CVTSD2SI,
27501 IX86_BUILTIN_CVTSD2SI64,
27502 IX86_BUILTIN_CVTSD2SS,
27503 IX86_BUILTIN_CVTSS2SD,
27504 IX86_BUILTIN_CVTTSD2SI,
27505 IX86_BUILTIN_CVTTSD2SI64,
27507 IX86_BUILTIN_CVTPS2DQ,
27508 IX86_BUILTIN_CVTPS2PD,
27509 IX86_BUILTIN_CVTTPS2DQ,
27511 IX86_BUILTIN_MOVNTI,
27512 IX86_BUILTIN_MOVNTI64,
27513 IX86_BUILTIN_MOVNTPD,
27514 IX86_BUILTIN_MOVNTDQ,
27516 IX86_BUILTIN_MOVQ128,
27518 /* SSE2 MMX */
27519 IX86_BUILTIN_MASKMOVDQU,
27520 IX86_BUILTIN_MOVMSKPD,
27521 IX86_BUILTIN_PMOVMSKB128,
27523 IX86_BUILTIN_PACKSSWB128,
27524 IX86_BUILTIN_PACKSSDW128,
27525 IX86_BUILTIN_PACKUSWB128,
27527 IX86_BUILTIN_PADDB128,
27528 IX86_BUILTIN_PADDW128,
27529 IX86_BUILTIN_PADDD128,
27530 IX86_BUILTIN_PADDQ128,
27531 IX86_BUILTIN_PADDSB128,
27532 IX86_BUILTIN_PADDSW128,
27533 IX86_BUILTIN_PADDUSB128,
27534 IX86_BUILTIN_PADDUSW128,
27535 IX86_BUILTIN_PSUBB128,
27536 IX86_BUILTIN_PSUBW128,
27537 IX86_BUILTIN_PSUBD128,
27538 IX86_BUILTIN_PSUBQ128,
27539 IX86_BUILTIN_PSUBSB128,
27540 IX86_BUILTIN_PSUBSW128,
27541 IX86_BUILTIN_PSUBUSB128,
27542 IX86_BUILTIN_PSUBUSW128,
27544 IX86_BUILTIN_PAND128,
27545 IX86_BUILTIN_PANDN128,
27546 IX86_BUILTIN_POR128,
27547 IX86_BUILTIN_PXOR128,
27549 IX86_BUILTIN_PAVGB128,
27550 IX86_BUILTIN_PAVGW128,
27552 IX86_BUILTIN_PCMPEQB128,
27553 IX86_BUILTIN_PCMPEQW128,
27554 IX86_BUILTIN_PCMPEQD128,
27555 IX86_BUILTIN_PCMPGTB128,
27556 IX86_BUILTIN_PCMPGTW128,
27557 IX86_BUILTIN_PCMPGTD128,
27559 IX86_BUILTIN_PMADDWD128,
27561 IX86_BUILTIN_PMAXSW128,
27562 IX86_BUILTIN_PMAXUB128,
27563 IX86_BUILTIN_PMINSW128,
27564 IX86_BUILTIN_PMINUB128,
27566 IX86_BUILTIN_PMULUDQ,
27567 IX86_BUILTIN_PMULUDQ128,
27568 IX86_BUILTIN_PMULHUW128,
27569 IX86_BUILTIN_PMULHW128,
27570 IX86_BUILTIN_PMULLW128,
27572 IX86_BUILTIN_PSADBW128,
27573 IX86_BUILTIN_PSHUFHW,
27574 IX86_BUILTIN_PSHUFLW,
27575 IX86_BUILTIN_PSHUFD,
27577 IX86_BUILTIN_PSLLDQI128,
27578 IX86_BUILTIN_PSLLWI128,
27579 IX86_BUILTIN_PSLLDI128,
27580 IX86_BUILTIN_PSLLQI128,
27581 IX86_BUILTIN_PSRAWI128,
27582 IX86_BUILTIN_PSRADI128,
27583 IX86_BUILTIN_PSRLDQI128,
27584 IX86_BUILTIN_PSRLWI128,
27585 IX86_BUILTIN_PSRLDI128,
27586 IX86_BUILTIN_PSRLQI128,
27588 IX86_BUILTIN_PSLLDQ128,
27589 IX86_BUILTIN_PSLLW128,
27590 IX86_BUILTIN_PSLLD128,
27591 IX86_BUILTIN_PSLLQ128,
27592 IX86_BUILTIN_PSRAW128,
27593 IX86_BUILTIN_PSRAD128,
27594 IX86_BUILTIN_PSRLW128,
27595 IX86_BUILTIN_PSRLD128,
27596 IX86_BUILTIN_PSRLQ128,
27598 IX86_BUILTIN_PUNPCKHBW128,
27599 IX86_BUILTIN_PUNPCKHWD128,
27600 IX86_BUILTIN_PUNPCKHDQ128,
27601 IX86_BUILTIN_PUNPCKHQDQ128,
27602 IX86_BUILTIN_PUNPCKLBW128,
27603 IX86_BUILTIN_PUNPCKLWD128,
27604 IX86_BUILTIN_PUNPCKLDQ128,
27605 IX86_BUILTIN_PUNPCKLQDQ128,
27607 IX86_BUILTIN_CLFLUSH,
27608 IX86_BUILTIN_MFENCE,
27609 IX86_BUILTIN_LFENCE,
27610 IX86_BUILTIN_PAUSE,
27612 IX86_BUILTIN_FNSTENV,
27613 IX86_BUILTIN_FLDENV,
27614 IX86_BUILTIN_FNSTSW,
27615 IX86_BUILTIN_FNCLEX,
27617 IX86_BUILTIN_BSRSI,
27618 IX86_BUILTIN_BSRDI,
27619 IX86_BUILTIN_RDPMC,
27620 IX86_BUILTIN_RDTSC,
27621 IX86_BUILTIN_RDTSCP,
27622 IX86_BUILTIN_ROLQI,
27623 IX86_BUILTIN_ROLHI,
27624 IX86_BUILTIN_RORQI,
27625 IX86_BUILTIN_RORHI,
27627 /* SSE3. */
27628 IX86_BUILTIN_ADDSUBPS,
27629 IX86_BUILTIN_HADDPS,
27630 IX86_BUILTIN_HSUBPS,
27631 IX86_BUILTIN_MOVSHDUP,
27632 IX86_BUILTIN_MOVSLDUP,
27633 IX86_BUILTIN_ADDSUBPD,
27634 IX86_BUILTIN_HADDPD,
27635 IX86_BUILTIN_HSUBPD,
27636 IX86_BUILTIN_LDDQU,
27638 IX86_BUILTIN_MONITOR,
27639 IX86_BUILTIN_MWAIT,
27641 /* SSSE3. */
27642 IX86_BUILTIN_PHADDW,
27643 IX86_BUILTIN_PHADDD,
27644 IX86_BUILTIN_PHADDSW,
27645 IX86_BUILTIN_PHSUBW,
27646 IX86_BUILTIN_PHSUBD,
27647 IX86_BUILTIN_PHSUBSW,
27648 IX86_BUILTIN_PMADDUBSW,
27649 IX86_BUILTIN_PMULHRSW,
27650 IX86_BUILTIN_PSHUFB,
27651 IX86_BUILTIN_PSIGNB,
27652 IX86_BUILTIN_PSIGNW,
27653 IX86_BUILTIN_PSIGND,
27654 IX86_BUILTIN_PALIGNR,
27655 IX86_BUILTIN_PABSB,
27656 IX86_BUILTIN_PABSW,
27657 IX86_BUILTIN_PABSD,
27659 IX86_BUILTIN_PHADDW128,
27660 IX86_BUILTIN_PHADDD128,
27661 IX86_BUILTIN_PHADDSW128,
27662 IX86_BUILTIN_PHSUBW128,
27663 IX86_BUILTIN_PHSUBD128,
27664 IX86_BUILTIN_PHSUBSW128,
27665 IX86_BUILTIN_PMADDUBSW128,
27666 IX86_BUILTIN_PMULHRSW128,
27667 IX86_BUILTIN_PSHUFB128,
27668 IX86_BUILTIN_PSIGNB128,
27669 IX86_BUILTIN_PSIGNW128,
27670 IX86_BUILTIN_PSIGND128,
27671 IX86_BUILTIN_PALIGNR128,
27672 IX86_BUILTIN_PABSB128,
27673 IX86_BUILTIN_PABSW128,
27674 IX86_BUILTIN_PABSD128,
27676 /* AMDFAM10 - SSE4A New Instructions. */
27677 IX86_BUILTIN_MOVNTSD,
27678 IX86_BUILTIN_MOVNTSS,
27679 IX86_BUILTIN_EXTRQI,
27680 IX86_BUILTIN_EXTRQ,
27681 IX86_BUILTIN_INSERTQI,
27682 IX86_BUILTIN_INSERTQ,
27684 /* SSE4.1. */
27685 IX86_BUILTIN_BLENDPD,
27686 IX86_BUILTIN_BLENDPS,
27687 IX86_BUILTIN_BLENDVPD,
27688 IX86_BUILTIN_BLENDVPS,
27689 IX86_BUILTIN_PBLENDVB128,
27690 IX86_BUILTIN_PBLENDW128,
27692 IX86_BUILTIN_DPPD,
27693 IX86_BUILTIN_DPPS,
27695 IX86_BUILTIN_INSERTPS128,
27697 IX86_BUILTIN_MOVNTDQA,
27698 IX86_BUILTIN_MPSADBW128,
27699 IX86_BUILTIN_PACKUSDW128,
27700 IX86_BUILTIN_PCMPEQQ,
27701 IX86_BUILTIN_PHMINPOSUW128,
27703 IX86_BUILTIN_PMAXSB128,
27704 IX86_BUILTIN_PMAXSD128,
27705 IX86_BUILTIN_PMAXUD128,
27706 IX86_BUILTIN_PMAXUW128,
27708 IX86_BUILTIN_PMINSB128,
27709 IX86_BUILTIN_PMINSD128,
27710 IX86_BUILTIN_PMINUD128,
27711 IX86_BUILTIN_PMINUW128,
27713 IX86_BUILTIN_PMOVSXBW128,
27714 IX86_BUILTIN_PMOVSXBD128,
27715 IX86_BUILTIN_PMOVSXBQ128,
27716 IX86_BUILTIN_PMOVSXWD128,
27717 IX86_BUILTIN_PMOVSXWQ128,
27718 IX86_BUILTIN_PMOVSXDQ128,
27720 IX86_BUILTIN_PMOVZXBW128,
27721 IX86_BUILTIN_PMOVZXBD128,
27722 IX86_BUILTIN_PMOVZXBQ128,
27723 IX86_BUILTIN_PMOVZXWD128,
27724 IX86_BUILTIN_PMOVZXWQ128,
27725 IX86_BUILTIN_PMOVZXDQ128,
27727 IX86_BUILTIN_PMULDQ128,
27728 IX86_BUILTIN_PMULLD128,
27730 IX86_BUILTIN_ROUNDSD,
27731 IX86_BUILTIN_ROUNDSS,
27733 IX86_BUILTIN_ROUNDPD,
27734 IX86_BUILTIN_ROUNDPS,
27736 IX86_BUILTIN_FLOORPD,
27737 IX86_BUILTIN_CEILPD,
27738 IX86_BUILTIN_TRUNCPD,
27739 IX86_BUILTIN_RINTPD,
27740 IX86_BUILTIN_ROUNDPD_AZ,
27742 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27743 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27744 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27746 IX86_BUILTIN_FLOORPS,
27747 IX86_BUILTIN_CEILPS,
27748 IX86_BUILTIN_TRUNCPS,
27749 IX86_BUILTIN_RINTPS,
27750 IX86_BUILTIN_ROUNDPS_AZ,
27752 IX86_BUILTIN_FLOORPS_SFIX,
27753 IX86_BUILTIN_CEILPS_SFIX,
27754 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27756 IX86_BUILTIN_PTESTZ,
27757 IX86_BUILTIN_PTESTC,
27758 IX86_BUILTIN_PTESTNZC,
27760 IX86_BUILTIN_VEC_INIT_V2SI,
27761 IX86_BUILTIN_VEC_INIT_V4HI,
27762 IX86_BUILTIN_VEC_INIT_V8QI,
27763 IX86_BUILTIN_VEC_EXT_V2DF,
27764 IX86_BUILTIN_VEC_EXT_V2DI,
27765 IX86_BUILTIN_VEC_EXT_V4SF,
27766 IX86_BUILTIN_VEC_EXT_V4SI,
27767 IX86_BUILTIN_VEC_EXT_V8HI,
27768 IX86_BUILTIN_VEC_EXT_V2SI,
27769 IX86_BUILTIN_VEC_EXT_V4HI,
27770 IX86_BUILTIN_VEC_EXT_V16QI,
27771 IX86_BUILTIN_VEC_SET_V2DI,
27772 IX86_BUILTIN_VEC_SET_V4SF,
27773 IX86_BUILTIN_VEC_SET_V4SI,
27774 IX86_BUILTIN_VEC_SET_V8HI,
27775 IX86_BUILTIN_VEC_SET_V4HI,
27776 IX86_BUILTIN_VEC_SET_V16QI,
27778 IX86_BUILTIN_VEC_PACK_SFIX,
27779 IX86_BUILTIN_VEC_PACK_SFIX256,
27781 /* SSE4.2. */
27782 IX86_BUILTIN_CRC32QI,
27783 IX86_BUILTIN_CRC32HI,
27784 IX86_BUILTIN_CRC32SI,
27785 IX86_BUILTIN_CRC32DI,
27787 IX86_BUILTIN_PCMPESTRI128,
27788 IX86_BUILTIN_PCMPESTRM128,
27789 IX86_BUILTIN_PCMPESTRA128,
27790 IX86_BUILTIN_PCMPESTRC128,
27791 IX86_BUILTIN_PCMPESTRO128,
27792 IX86_BUILTIN_PCMPESTRS128,
27793 IX86_BUILTIN_PCMPESTRZ128,
27794 IX86_BUILTIN_PCMPISTRI128,
27795 IX86_BUILTIN_PCMPISTRM128,
27796 IX86_BUILTIN_PCMPISTRA128,
27797 IX86_BUILTIN_PCMPISTRC128,
27798 IX86_BUILTIN_PCMPISTRO128,
27799 IX86_BUILTIN_PCMPISTRS128,
27800 IX86_BUILTIN_PCMPISTRZ128,
27802 IX86_BUILTIN_PCMPGTQ,
27804 /* AES instructions */
27805 IX86_BUILTIN_AESENC128,
27806 IX86_BUILTIN_AESENCLAST128,
27807 IX86_BUILTIN_AESDEC128,
27808 IX86_BUILTIN_AESDECLAST128,
27809 IX86_BUILTIN_AESIMC128,
27810 IX86_BUILTIN_AESKEYGENASSIST128,
27812 /* PCLMUL instruction */
27813 IX86_BUILTIN_PCLMULQDQ128,
27815 /* AVX */
27816 IX86_BUILTIN_ADDPD256,
27817 IX86_BUILTIN_ADDPS256,
27818 IX86_BUILTIN_ADDSUBPD256,
27819 IX86_BUILTIN_ADDSUBPS256,
27820 IX86_BUILTIN_ANDPD256,
27821 IX86_BUILTIN_ANDPS256,
27822 IX86_BUILTIN_ANDNPD256,
27823 IX86_BUILTIN_ANDNPS256,
27824 IX86_BUILTIN_BLENDPD256,
27825 IX86_BUILTIN_BLENDPS256,
27826 IX86_BUILTIN_BLENDVPD256,
27827 IX86_BUILTIN_BLENDVPS256,
27828 IX86_BUILTIN_DIVPD256,
27829 IX86_BUILTIN_DIVPS256,
27830 IX86_BUILTIN_DPPS256,
27831 IX86_BUILTIN_HADDPD256,
27832 IX86_BUILTIN_HADDPS256,
27833 IX86_BUILTIN_HSUBPD256,
27834 IX86_BUILTIN_HSUBPS256,
27835 IX86_BUILTIN_MAXPD256,
27836 IX86_BUILTIN_MAXPS256,
27837 IX86_BUILTIN_MINPD256,
27838 IX86_BUILTIN_MINPS256,
27839 IX86_BUILTIN_MULPD256,
27840 IX86_BUILTIN_MULPS256,
27841 IX86_BUILTIN_ORPD256,
27842 IX86_BUILTIN_ORPS256,
27843 IX86_BUILTIN_SHUFPD256,
27844 IX86_BUILTIN_SHUFPS256,
27845 IX86_BUILTIN_SUBPD256,
27846 IX86_BUILTIN_SUBPS256,
27847 IX86_BUILTIN_XORPD256,
27848 IX86_BUILTIN_XORPS256,
27849 IX86_BUILTIN_CMPSD,
27850 IX86_BUILTIN_CMPSS,
27851 IX86_BUILTIN_CMPPD,
27852 IX86_BUILTIN_CMPPS,
27853 IX86_BUILTIN_CMPPD256,
27854 IX86_BUILTIN_CMPPS256,
27855 IX86_BUILTIN_CVTDQ2PD256,
27856 IX86_BUILTIN_CVTDQ2PS256,
27857 IX86_BUILTIN_CVTPD2PS256,
27858 IX86_BUILTIN_CVTPS2DQ256,
27859 IX86_BUILTIN_CVTPS2PD256,
27860 IX86_BUILTIN_CVTTPD2DQ256,
27861 IX86_BUILTIN_CVTPD2DQ256,
27862 IX86_BUILTIN_CVTTPS2DQ256,
27863 IX86_BUILTIN_EXTRACTF128PD256,
27864 IX86_BUILTIN_EXTRACTF128PS256,
27865 IX86_BUILTIN_EXTRACTF128SI256,
27866 IX86_BUILTIN_VZEROALL,
27867 IX86_BUILTIN_VZEROUPPER,
27868 IX86_BUILTIN_VPERMILVARPD,
27869 IX86_BUILTIN_VPERMILVARPS,
27870 IX86_BUILTIN_VPERMILVARPD256,
27871 IX86_BUILTIN_VPERMILVARPS256,
27872 IX86_BUILTIN_VPERMILPD,
27873 IX86_BUILTIN_VPERMILPS,
27874 IX86_BUILTIN_VPERMILPD256,
27875 IX86_BUILTIN_VPERMILPS256,
27876 IX86_BUILTIN_VPERMIL2PD,
27877 IX86_BUILTIN_VPERMIL2PS,
27878 IX86_BUILTIN_VPERMIL2PD256,
27879 IX86_BUILTIN_VPERMIL2PS256,
27880 IX86_BUILTIN_VPERM2F128PD256,
27881 IX86_BUILTIN_VPERM2F128PS256,
27882 IX86_BUILTIN_VPERM2F128SI256,
27883 IX86_BUILTIN_VBROADCASTSS,
27884 IX86_BUILTIN_VBROADCASTSD256,
27885 IX86_BUILTIN_VBROADCASTSS256,
27886 IX86_BUILTIN_VBROADCASTPD256,
27887 IX86_BUILTIN_VBROADCASTPS256,
27888 IX86_BUILTIN_VINSERTF128PD256,
27889 IX86_BUILTIN_VINSERTF128PS256,
27890 IX86_BUILTIN_VINSERTF128SI256,
27891 IX86_BUILTIN_LOADUPD256,
27892 IX86_BUILTIN_LOADUPS256,
27893 IX86_BUILTIN_STOREUPD256,
27894 IX86_BUILTIN_STOREUPS256,
27895 IX86_BUILTIN_LDDQU256,
27896 IX86_BUILTIN_MOVNTDQ256,
27897 IX86_BUILTIN_MOVNTPD256,
27898 IX86_BUILTIN_MOVNTPS256,
27899 IX86_BUILTIN_LOADDQU256,
27900 IX86_BUILTIN_STOREDQU256,
27901 IX86_BUILTIN_MASKLOADPD,
27902 IX86_BUILTIN_MASKLOADPS,
27903 IX86_BUILTIN_MASKSTOREPD,
27904 IX86_BUILTIN_MASKSTOREPS,
27905 IX86_BUILTIN_MASKLOADPD256,
27906 IX86_BUILTIN_MASKLOADPS256,
27907 IX86_BUILTIN_MASKSTOREPD256,
27908 IX86_BUILTIN_MASKSTOREPS256,
27909 IX86_BUILTIN_MOVSHDUP256,
27910 IX86_BUILTIN_MOVSLDUP256,
27911 IX86_BUILTIN_MOVDDUP256,
27913 IX86_BUILTIN_SQRTPD256,
27914 IX86_BUILTIN_SQRTPS256,
27915 IX86_BUILTIN_SQRTPS_NR256,
27916 IX86_BUILTIN_RSQRTPS256,
27917 IX86_BUILTIN_RSQRTPS_NR256,
27919 IX86_BUILTIN_RCPPS256,
27921 IX86_BUILTIN_ROUNDPD256,
27922 IX86_BUILTIN_ROUNDPS256,
27924 IX86_BUILTIN_FLOORPD256,
27925 IX86_BUILTIN_CEILPD256,
27926 IX86_BUILTIN_TRUNCPD256,
27927 IX86_BUILTIN_RINTPD256,
27928 IX86_BUILTIN_ROUNDPD_AZ256,
27930 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27931 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27932 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27934 IX86_BUILTIN_FLOORPS256,
27935 IX86_BUILTIN_CEILPS256,
27936 IX86_BUILTIN_TRUNCPS256,
27937 IX86_BUILTIN_RINTPS256,
27938 IX86_BUILTIN_ROUNDPS_AZ256,
27940 IX86_BUILTIN_FLOORPS_SFIX256,
27941 IX86_BUILTIN_CEILPS_SFIX256,
27942 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27944 IX86_BUILTIN_UNPCKHPD256,
27945 IX86_BUILTIN_UNPCKLPD256,
27946 IX86_BUILTIN_UNPCKHPS256,
27947 IX86_BUILTIN_UNPCKLPS256,
27949 IX86_BUILTIN_SI256_SI,
27950 IX86_BUILTIN_PS256_PS,
27951 IX86_BUILTIN_PD256_PD,
27952 IX86_BUILTIN_SI_SI256,
27953 IX86_BUILTIN_PS_PS256,
27954 IX86_BUILTIN_PD_PD256,
27956 IX86_BUILTIN_VTESTZPD,
27957 IX86_BUILTIN_VTESTCPD,
27958 IX86_BUILTIN_VTESTNZCPD,
27959 IX86_BUILTIN_VTESTZPS,
27960 IX86_BUILTIN_VTESTCPS,
27961 IX86_BUILTIN_VTESTNZCPS,
27962 IX86_BUILTIN_VTESTZPD256,
27963 IX86_BUILTIN_VTESTCPD256,
27964 IX86_BUILTIN_VTESTNZCPD256,
27965 IX86_BUILTIN_VTESTZPS256,
27966 IX86_BUILTIN_VTESTCPS256,
27967 IX86_BUILTIN_VTESTNZCPS256,
27968 IX86_BUILTIN_PTESTZ256,
27969 IX86_BUILTIN_PTESTC256,
27970 IX86_BUILTIN_PTESTNZC256,
27972 IX86_BUILTIN_MOVMSKPD256,
27973 IX86_BUILTIN_MOVMSKPS256,
27975 /* AVX2 */
27976 IX86_BUILTIN_MPSADBW256,
27977 IX86_BUILTIN_PABSB256,
27978 IX86_BUILTIN_PABSW256,
27979 IX86_BUILTIN_PABSD256,
27980 IX86_BUILTIN_PACKSSDW256,
27981 IX86_BUILTIN_PACKSSWB256,
27982 IX86_BUILTIN_PACKUSDW256,
27983 IX86_BUILTIN_PACKUSWB256,
27984 IX86_BUILTIN_PADDB256,
27985 IX86_BUILTIN_PADDW256,
27986 IX86_BUILTIN_PADDD256,
27987 IX86_BUILTIN_PADDQ256,
27988 IX86_BUILTIN_PADDSB256,
27989 IX86_BUILTIN_PADDSW256,
27990 IX86_BUILTIN_PADDUSB256,
27991 IX86_BUILTIN_PADDUSW256,
27992 IX86_BUILTIN_PALIGNR256,
27993 IX86_BUILTIN_AND256I,
27994 IX86_BUILTIN_ANDNOT256I,
27995 IX86_BUILTIN_PAVGB256,
27996 IX86_BUILTIN_PAVGW256,
27997 IX86_BUILTIN_PBLENDVB256,
27998 IX86_BUILTIN_PBLENDVW256,
27999 IX86_BUILTIN_PCMPEQB256,
28000 IX86_BUILTIN_PCMPEQW256,
28001 IX86_BUILTIN_PCMPEQD256,
28002 IX86_BUILTIN_PCMPEQQ256,
28003 IX86_BUILTIN_PCMPGTB256,
28004 IX86_BUILTIN_PCMPGTW256,
28005 IX86_BUILTIN_PCMPGTD256,
28006 IX86_BUILTIN_PCMPGTQ256,
28007 IX86_BUILTIN_PHADDW256,
28008 IX86_BUILTIN_PHADDD256,
28009 IX86_BUILTIN_PHADDSW256,
28010 IX86_BUILTIN_PHSUBW256,
28011 IX86_BUILTIN_PHSUBD256,
28012 IX86_BUILTIN_PHSUBSW256,
28013 IX86_BUILTIN_PMADDUBSW256,
28014 IX86_BUILTIN_PMADDWD256,
28015 IX86_BUILTIN_PMAXSB256,
28016 IX86_BUILTIN_PMAXSW256,
28017 IX86_BUILTIN_PMAXSD256,
28018 IX86_BUILTIN_PMAXUB256,
28019 IX86_BUILTIN_PMAXUW256,
28020 IX86_BUILTIN_PMAXUD256,
28021 IX86_BUILTIN_PMINSB256,
28022 IX86_BUILTIN_PMINSW256,
28023 IX86_BUILTIN_PMINSD256,
28024 IX86_BUILTIN_PMINUB256,
28025 IX86_BUILTIN_PMINUW256,
28026 IX86_BUILTIN_PMINUD256,
28027 IX86_BUILTIN_PMOVMSKB256,
28028 IX86_BUILTIN_PMOVSXBW256,
28029 IX86_BUILTIN_PMOVSXBD256,
28030 IX86_BUILTIN_PMOVSXBQ256,
28031 IX86_BUILTIN_PMOVSXWD256,
28032 IX86_BUILTIN_PMOVSXWQ256,
28033 IX86_BUILTIN_PMOVSXDQ256,
28034 IX86_BUILTIN_PMOVZXBW256,
28035 IX86_BUILTIN_PMOVZXBD256,
28036 IX86_BUILTIN_PMOVZXBQ256,
28037 IX86_BUILTIN_PMOVZXWD256,
28038 IX86_BUILTIN_PMOVZXWQ256,
28039 IX86_BUILTIN_PMOVZXDQ256,
28040 IX86_BUILTIN_PMULDQ256,
28041 IX86_BUILTIN_PMULHRSW256,
28042 IX86_BUILTIN_PMULHUW256,
28043 IX86_BUILTIN_PMULHW256,
28044 IX86_BUILTIN_PMULLW256,
28045 IX86_BUILTIN_PMULLD256,
28046 IX86_BUILTIN_PMULUDQ256,
28047 IX86_BUILTIN_POR256,
28048 IX86_BUILTIN_PSADBW256,
28049 IX86_BUILTIN_PSHUFB256,
28050 IX86_BUILTIN_PSHUFD256,
28051 IX86_BUILTIN_PSHUFHW256,
28052 IX86_BUILTIN_PSHUFLW256,
28053 IX86_BUILTIN_PSIGNB256,
28054 IX86_BUILTIN_PSIGNW256,
28055 IX86_BUILTIN_PSIGND256,
28056 IX86_BUILTIN_PSLLDQI256,
28057 IX86_BUILTIN_PSLLWI256,
28058 IX86_BUILTIN_PSLLW256,
28059 IX86_BUILTIN_PSLLDI256,
28060 IX86_BUILTIN_PSLLD256,
28061 IX86_BUILTIN_PSLLQI256,
28062 IX86_BUILTIN_PSLLQ256,
28063 IX86_BUILTIN_PSRAWI256,
28064 IX86_BUILTIN_PSRAW256,
28065 IX86_BUILTIN_PSRADI256,
28066 IX86_BUILTIN_PSRAD256,
28067 IX86_BUILTIN_PSRLDQI256,
28068 IX86_BUILTIN_PSRLWI256,
28069 IX86_BUILTIN_PSRLW256,
28070 IX86_BUILTIN_PSRLDI256,
28071 IX86_BUILTIN_PSRLD256,
28072 IX86_BUILTIN_PSRLQI256,
28073 IX86_BUILTIN_PSRLQ256,
28074 IX86_BUILTIN_PSUBB256,
28075 IX86_BUILTIN_PSUBW256,
28076 IX86_BUILTIN_PSUBD256,
28077 IX86_BUILTIN_PSUBQ256,
28078 IX86_BUILTIN_PSUBSB256,
28079 IX86_BUILTIN_PSUBSW256,
28080 IX86_BUILTIN_PSUBUSB256,
28081 IX86_BUILTIN_PSUBUSW256,
28082 IX86_BUILTIN_PUNPCKHBW256,
28083 IX86_BUILTIN_PUNPCKHWD256,
28084 IX86_BUILTIN_PUNPCKHDQ256,
28085 IX86_BUILTIN_PUNPCKHQDQ256,
28086 IX86_BUILTIN_PUNPCKLBW256,
28087 IX86_BUILTIN_PUNPCKLWD256,
28088 IX86_BUILTIN_PUNPCKLDQ256,
28089 IX86_BUILTIN_PUNPCKLQDQ256,
28090 IX86_BUILTIN_PXOR256,
28091 IX86_BUILTIN_MOVNTDQA256,
28092 IX86_BUILTIN_VBROADCASTSS_PS,
28093 IX86_BUILTIN_VBROADCASTSS_PS256,
28094 IX86_BUILTIN_VBROADCASTSD_PD256,
28095 IX86_BUILTIN_VBROADCASTSI256,
28096 IX86_BUILTIN_PBLENDD256,
28097 IX86_BUILTIN_PBLENDD128,
28098 IX86_BUILTIN_PBROADCASTB256,
28099 IX86_BUILTIN_PBROADCASTW256,
28100 IX86_BUILTIN_PBROADCASTD256,
28101 IX86_BUILTIN_PBROADCASTQ256,
28102 IX86_BUILTIN_PBROADCASTB128,
28103 IX86_BUILTIN_PBROADCASTW128,
28104 IX86_BUILTIN_PBROADCASTD128,
28105 IX86_BUILTIN_PBROADCASTQ128,
28106 IX86_BUILTIN_VPERMVARSI256,
28107 IX86_BUILTIN_VPERMDF256,
28108 IX86_BUILTIN_VPERMVARSF256,
28109 IX86_BUILTIN_VPERMDI256,
28110 IX86_BUILTIN_VPERMTI256,
28111 IX86_BUILTIN_VEXTRACT128I256,
28112 IX86_BUILTIN_VINSERT128I256,
28113 IX86_BUILTIN_MASKLOADD,
28114 IX86_BUILTIN_MASKLOADQ,
28115 IX86_BUILTIN_MASKLOADD256,
28116 IX86_BUILTIN_MASKLOADQ256,
28117 IX86_BUILTIN_MASKSTORED,
28118 IX86_BUILTIN_MASKSTOREQ,
28119 IX86_BUILTIN_MASKSTORED256,
28120 IX86_BUILTIN_MASKSTOREQ256,
28121 IX86_BUILTIN_PSLLVV4DI,
28122 IX86_BUILTIN_PSLLVV2DI,
28123 IX86_BUILTIN_PSLLVV8SI,
28124 IX86_BUILTIN_PSLLVV4SI,
28125 IX86_BUILTIN_PSRAVV8SI,
28126 IX86_BUILTIN_PSRAVV4SI,
28127 IX86_BUILTIN_PSRLVV4DI,
28128 IX86_BUILTIN_PSRLVV2DI,
28129 IX86_BUILTIN_PSRLVV8SI,
28130 IX86_BUILTIN_PSRLVV4SI,
28132 IX86_BUILTIN_GATHERSIV2DF,
28133 IX86_BUILTIN_GATHERSIV4DF,
28134 IX86_BUILTIN_GATHERDIV2DF,
28135 IX86_BUILTIN_GATHERDIV4DF,
28136 IX86_BUILTIN_GATHERSIV4SF,
28137 IX86_BUILTIN_GATHERSIV8SF,
28138 IX86_BUILTIN_GATHERDIV4SF,
28139 IX86_BUILTIN_GATHERDIV8SF,
28140 IX86_BUILTIN_GATHERSIV2DI,
28141 IX86_BUILTIN_GATHERSIV4DI,
28142 IX86_BUILTIN_GATHERDIV2DI,
28143 IX86_BUILTIN_GATHERDIV4DI,
28144 IX86_BUILTIN_GATHERSIV4SI,
28145 IX86_BUILTIN_GATHERSIV8SI,
28146 IX86_BUILTIN_GATHERDIV4SI,
28147 IX86_BUILTIN_GATHERDIV8SI,
28149 /* AVX512F */
28150 IX86_BUILTIN_SI512_SI256,
28151 IX86_BUILTIN_PD512_PD256,
28152 IX86_BUILTIN_PS512_PS256,
28153 IX86_BUILTIN_SI512_SI,
28154 IX86_BUILTIN_PD512_PD,
28155 IX86_BUILTIN_PS512_PS,
28156 IX86_BUILTIN_ADDPD512,
28157 IX86_BUILTIN_ADDPS512,
28158 IX86_BUILTIN_ADDSD_ROUND,
28159 IX86_BUILTIN_ADDSS_ROUND,
28160 IX86_BUILTIN_ALIGND512,
28161 IX86_BUILTIN_ALIGNQ512,
28162 IX86_BUILTIN_BLENDMD512,
28163 IX86_BUILTIN_BLENDMPD512,
28164 IX86_BUILTIN_BLENDMPS512,
28165 IX86_BUILTIN_BLENDMQ512,
28166 IX86_BUILTIN_BROADCASTF32X4_512,
28167 IX86_BUILTIN_BROADCASTF64X4_512,
28168 IX86_BUILTIN_BROADCASTI32X4_512,
28169 IX86_BUILTIN_BROADCASTI64X4_512,
28170 IX86_BUILTIN_BROADCASTSD512,
28171 IX86_BUILTIN_BROADCASTSS512,
28172 IX86_BUILTIN_CMPD512,
28173 IX86_BUILTIN_CMPPD512,
28174 IX86_BUILTIN_CMPPS512,
28175 IX86_BUILTIN_CMPQ512,
28176 IX86_BUILTIN_CMPSD_MASK,
28177 IX86_BUILTIN_CMPSS_MASK,
28178 IX86_BUILTIN_COMIDF,
28179 IX86_BUILTIN_COMISF,
28180 IX86_BUILTIN_COMPRESSPD512,
28181 IX86_BUILTIN_COMPRESSPDSTORE512,
28182 IX86_BUILTIN_COMPRESSPS512,
28183 IX86_BUILTIN_COMPRESSPSSTORE512,
28184 IX86_BUILTIN_CVTDQ2PD512,
28185 IX86_BUILTIN_CVTDQ2PS512,
28186 IX86_BUILTIN_CVTPD2DQ512,
28187 IX86_BUILTIN_CVTPD2PS512,
28188 IX86_BUILTIN_CVTPD2UDQ512,
28189 IX86_BUILTIN_CVTPH2PS512,
28190 IX86_BUILTIN_CVTPS2DQ512,
28191 IX86_BUILTIN_CVTPS2PD512,
28192 IX86_BUILTIN_CVTPS2PH512,
28193 IX86_BUILTIN_CVTPS2UDQ512,
28194 IX86_BUILTIN_CVTSD2SS_ROUND,
28195 IX86_BUILTIN_CVTSI2SD64,
28196 IX86_BUILTIN_CVTSI2SS32,
28197 IX86_BUILTIN_CVTSI2SS64,
28198 IX86_BUILTIN_CVTSS2SD_ROUND,
28199 IX86_BUILTIN_CVTTPD2DQ512,
28200 IX86_BUILTIN_CVTTPD2UDQ512,
28201 IX86_BUILTIN_CVTTPS2DQ512,
28202 IX86_BUILTIN_CVTTPS2UDQ512,
28203 IX86_BUILTIN_CVTUDQ2PD512,
28204 IX86_BUILTIN_CVTUDQ2PS512,
28205 IX86_BUILTIN_CVTUSI2SD32,
28206 IX86_BUILTIN_CVTUSI2SD64,
28207 IX86_BUILTIN_CVTUSI2SS32,
28208 IX86_BUILTIN_CVTUSI2SS64,
28209 IX86_BUILTIN_DIVPD512,
28210 IX86_BUILTIN_DIVPS512,
28211 IX86_BUILTIN_DIVSD_ROUND,
28212 IX86_BUILTIN_DIVSS_ROUND,
28213 IX86_BUILTIN_EXPANDPD512,
28214 IX86_BUILTIN_EXPANDPD512Z,
28215 IX86_BUILTIN_EXPANDPDLOAD512,
28216 IX86_BUILTIN_EXPANDPDLOAD512Z,
28217 IX86_BUILTIN_EXPANDPS512,
28218 IX86_BUILTIN_EXPANDPS512Z,
28219 IX86_BUILTIN_EXPANDPSLOAD512,
28220 IX86_BUILTIN_EXPANDPSLOAD512Z,
28221 IX86_BUILTIN_EXTRACTF32X4,
28222 IX86_BUILTIN_EXTRACTF64X4,
28223 IX86_BUILTIN_EXTRACTI32X4,
28224 IX86_BUILTIN_EXTRACTI64X4,
28225 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28226 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28227 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28228 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28229 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28230 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28231 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28232 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28233 IX86_BUILTIN_GETEXPPD512,
28234 IX86_BUILTIN_GETEXPPS512,
28235 IX86_BUILTIN_GETEXPSD128,
28236 IX86_BUILTIN_GETEXPSS128,
28237 IX86_BUILTIN_GETMANTPD512,
28238 IX86_BUILTIN_GETMANTPS512,
28239 IX86_BUILTIN_GETMANTSD128,
28240 IX86_BUILTIN_GETMANTSS128,
28241 IX86_BUILTIN_INSERTF32X4,
28242 IX86_BUILTIN_INSERTF64X4,
28243 IX86_BUILTIN_INSERTI32X4,
28244 IX86_BUILTIN_INSERTI64X4,
28245 IX86_BUILTIN_LOADAPD512,
28246 IX86_BUILTIN_LOADAPS512,
28247 IX86_BUILTIN_LOADDQUDI512,
28248 IX86_BUILTIN_LOADDQUSI512,
28249 IX86_BUILTIN_LOADUPD512,
28250 IX86_BUILTIN_LOADUPS512,
28251 IX86_BUILTIN_MAXPD512,
28252 IX86_BUILTIN_MAXPS512,
28253 IX86_BUILTIN_MAXSD_ROUND,
28254 IX86_BUILTIN_MAXSS_ROUND,
28255 IX86_BUILTIN_MINPD512,
28256 IX86_BUILTIN_MINPS512,
28257 IX86_BUILTIN_MINSD_ROUND,
28258 IX86_BUILTIN_MINSS_ROUND,
28259 IX86_BUILTIN_MOVAPD512,
28260 IX86_BUILTIN_MOVAPS512,
28261 IX86_BUILTIN_MOVDDUP512,
28262 IX86_BUILTIN_MOVDQA32LOAD512,
28263 IX86_BUILTIN_MOVDQA32STORE512,
28264 IX86_BUILTIN_MOVDQA32_512,
28265 IX86_BUILTIN_MOVDQA64LOAD512,
28266 IX86_BUILTIN_MOVDQA64STORE512,
28267 IX86_BUILTIN_MOVDQA64_512,
28268 IX86_BUILTIN_MOVNTDQ512,
28269 IX86_BUILTIN_MOVNTDQA512,
28270 IX86_BUILTIN_MOVNTPD512,
28271 IX86_BUILTIN_MOVNTPS512,
28272 IX86_BUILTIN_MOVSHDUP512,
28273 IX86_BUILTIN_MOVSLDUP512,
28274 IX86_BUILTIN_MULPD512,
28275 IX86_BUILTIN_MULPS512,
28276 IX86_BUILTIN_MULSD_ROUND,
28277 IX86_BUILTIN_MULSS_ROUND,
28278 IX86_BUILTIN_PABSD512,
28279 IX86_BUILTIN_PABSQ512,
28280 IX86_BUILTIN_PADDD512,
28281 IX86_BUILTIN_PADDQ512,
28282 IX86_BUILTIN_PANDD512,
28283 IX86_BUILTIN_PANDND512,
28284 IX86_BUILTIN_PANDNQ512,
28285 IX86_BUILTIN_PANDQ512,
28286 IX86_BUILTIN_PBROADCASTD512,
28287 IX86_BUILTIN_PBROADCASTD512_GPR,
28288 IX86_BUILTIN_PBROADCASTMB512,
28289 IX86_BUILTIN_PBROADCASTMW512,
28290 IX86_BUILTIN_PBROADCASTQ512,
28291 IX86_BUILTIN_PBROADCASTQ512_GPR,
28292 IX86_BUILTIN_PBROADCASTQ512_MEM,
28293 IX86_BUILTIN_PCMPEQD512_MASK,
28294 IX86_BUILTIN_PCMPEQQ512_MASK,
28295 IX86_BUILTIN_PCMPGTD512_MASK,
28296 IX86_BUILTIN_PCMPGTQ512_MASK,
28297 IX86_BUILTIN_PCOMPRESSD512,
28298 IX86_BUILTIN_PCOMPRESSDSTORE512,
28299 IX86_BUILTIN_PCOMPRESSQ512,
28300 IX86_BUILTIN_PCOMPRESSQSTORE512,
28301 IX86_BUILTIN_PEXPANDD512,
28302 IX86_BUILTIN_PEXPANDD512Z,
28303 IX86_BUILTIN_PEXPANDDLOAD512,
28304 IX86_BUILTIN_PEXPANDDLOAD512Z,
28305 IX86_BUILTIN_PEXPANDQ512,
28306 IX86_BUILTIN_PEXPANDQ512Z,
28307 IX86_BUILTIN_PEXPANDQLOAD512,
28308 IX86_BUILTIN_PEXPANDQLOAD512Z,
28309 IX86_BUILTIN_PMAXSD512,
28310 IX86_BUILTIN_PMAXSQ512,
28311 IX86_BUILTIN_PMAXUD512,
28312 IX86_BUILTIN_PMAXUQ512,
28313 IX86_BUILTIN_PMINSD512,
28314 IX86_BUILTIN_PMINSQ512,
28315 IX86_BUILTIN_PMINUD512,
28316 IX86_BUILTIN_PMINUQ512,
28317 IX86_BUILTIN_PMOVDB512,
28318 IX86_BUILTIN_PMOVDB512_MEM,
28319 IX86_BUILTIN_PMOVDW512,
28320 IX86_BUILTIN_PMOVDW512_MEM,
28321 IX86_BUILTIN_PMOVQB512,
28322 IX86_BUILTIN_PMOVQB512_MEM,
28323 IX86_BUILTIN_PMOVQD512,
28324 IX86_BUILTIN_PMOVQD512_MEM,
28325 IX86_BUILTIN_PMOVQW512,
28326 IX86_BUILTIN_PMOVQW512_MEM,
28327 IX86_BUILTIN_PMOVSDB512,
28328 IX86_BUILTIN_PMOVSDB512_MEM,
28329 IX86_BUILTIN_PMOVSDW512,
28330 IX86_BUILTIN_PMOVSDW512_MEM,
28331 IX86_BUILTIN_PMOVSQB512,
28332 IX86_BUILTIN_PMOVSQB512_MEM,
28333 IX86_BUILTIN_PMOVSQD512,
28334 IX86_BUILTIN_PMOVSQD512_MEM,
28335 IX86_BUILTIN_PMOVSQW512,
28336 IX86_BUILTIN_PMOVSQW512_MEM,
28337 IX86_BUILTIN_PMOVSXBD512,
28338 IX86_BUILTIN_PMOVSXBQ512,
28339 IX86_BUILTIN_PMOVSXDQ512,
28340 IX86_BUILTIN_PMOVSXWD512,
28341 IX86_BUILTIN_PMOVSXWQ512,
28342 IX86_BUILTIN_PMOVUSDB512,
28343 IX86_BUILTIN_PMOVUSDB512_MEM,
28344 IX86_BUILTIN_PMOVUSDW512,
28345 IX86_BUILTIN_PMOVUSDW512_MEM,
28346 IX86_BUILTIN_PMOVUSQB512,
28347 IX86_BUILTIN_PMOVUSQB512_MEM,
28348 IX86_BUILTIN_PMOVUSQD512,
28349 IX86_BUILTIN_PMOVUSQD512_MEM,
28350 IX86_BUILTIN_PMOVUSQW512,
28351 IX86_BUILTIN_PMOVUSQW512_MEM,
28352 IX86_BUILTIN_PMOVZXBD512,
28353 IX86_BUILTIN_PMOVZXBQ512,
28354 IX86_BUILTIN_PMOVZXDQ512,
28355 IX86_BUILTIN_PMOVZXWD512,
28356 IX86_BUILTIN_PMOVZXWQ512,
28357 IX86_BUILTIN_PMULDQ512,
28358 IX86_BUILTIN_PMULLD512,
28359 IX86_BUILTIN_PMULUDQ512,
28360 IX86_BUILTIN_PORD512,
28361 IX86_BUILTIN_PORQ512,
28362 IX86_BUILTIN_PROLD512,
28363 IX86_BUILTIN_PROLQ512,
28364 IX86_BUILTIN_PROLVD512,
28365 IX86_BUILTIN_PROLVQ512,
28366 IX86_BUILTIN_PRORD512,
28367 IX86_BUILTIN_PRORQ512,
28368 IX86_BUILTIN_PRORVD512,
28369 IX86_BUILTIN_PRORVQ512,
28370 IX86_BUILTIN_PSHUFD512,
28371 IX86_BUILTIN_PSLLD512,
28372 IX86_BUILTIN_PSLLDI512,
28373 IX86_BUILTIN_PSLLQ512,
28374 IX86_BUILTIN_PSLLQI512,
28375 IX86_BUILTIN_PSLLVV16SI,
28376 IX86_BUILTIN_PSLLVV8DI,
28377 IX86_BUILTIN_PSRAD512,
28378 IX86_BUILTIN_PSRADI512,
28379 IX86_BUILTIN_PSRAQ512,
28380 IX86_BUILTIN_PSRAQI512,
28381 IX86_BUILTIN_PSRAVV16SI,
28382 IX86_BUILTIN_PSRAVV8DI,
28383 IX86_BUILTIN_PSRLD512,
28384 IX86_BUILTIN_PSRLDI512,
28385 IX86_BUILTIN_PSRLQ512,
28386 IX86_BUILTIN_PSRLQI512,
28387 IX86_BUILTIN_PSRLVV16SI,
28388 IX86_BUILTIN_PSRLVV8DI,
28389 IX86_BUILTIN_PSUBD512,
28390 IX86_BUILTIN_PSUBQ512,
28391 IX86_BUILTIN_PTESTMD512,
28392 IX86_BUILTIN_PTESTMQ512,
28393 IX86_BUILTIN_PTESTNMD512,
28394 IX86_BUILTIN_PTESTNMQ512,
28395 IX86_BUILTIN_PUNPCKHDQ512,
28396 IX86_BUILTIN_PUNPCKHQDQ512,
28397 IX86_BUILTIN_PUNPCKLDQ512,
28398 IX86_BUILTIN_PUNPCKLQDQ512,
28399 IX86_BUILTIN_PXORD512,
28400 IX86_BUILTIN_PXORQ512,
28401 IX86_BUILTIN_RCP14PD512,
28402 IX86_BUILTIN_RCP14PS512,
28403 IX86_BUILTIN_RCP14SD,
28404 IX86_BUILTIN_RCP14SS,
28405 IX86_BUILTIN_RNDSCALEPD,
28406 IX86_BUILTIN_RNDSCALEPS,
28407 IX86_BUILTIN_RNDSCALESD,
28408 IX86_BUILTIN_RNDSCALESS,
28409 IX86_BUILTIN_RSQRT14PD512,
28410 IX86_BUILTIN_RSQRT14PS512,
28411 IX86_BUILTIN_RSQRT14SD,
28412 IX86_BUILTIN_RSQRT14SS,
28413 IX86_BUILTIN_SCALEFPD512,
28414 IX86_BUILTIN_SCALEFPS512,
28415 IX86_BUILTIN_SCALEFSD,
28416 IX86_BUILTIN_SCALEFSS,
28417 IX86_BUILTIN_SHUFPD512,
28418 IX86_BUILTIN_SHUFPS512,
28419 IX86_BUILTIN_SHUF_F32x4,
28420 IX86_BUILTIN_SHUF_F64x2,
28421 IX86_BUILTIN_SHUF_I32x4,
28422 IX86_BUILTIN_SHUF_I64x2,
28423 IX86_BUILTIN_SQRTPD512,
28424 IX86_BUILTIN_SQRTPD512_MASK,
28425 IX86_BUILTIN_SQRTPS512_MASK,
28426 IX86_BUILTIN_SQRTPS_NR512,
28427 IX86_BUILTIN_SQRTSD_ROUND,
28428 IX86_BUILTIN_SQRTSS_ROUND,
28429 IX86_BUILTIN_STOREAPD512,
28430 IX86_BUILTIN_STOREAPS512,
28431 IX86_BUILTIN_STOREDQUDI512,
28432 IX86_BUILTIN_STOREDQUSI512,
28433 IX86_BUILTIN_STOREUPD512,
28434 IX86_BUILTIN_STOREUPS512,
28435 IX86_BUILTIN_SUBPD512,
28436 IX86_BUILTIN_SUBPS512,
28437 IX86_BUILTIN_SUBSD_ROUND,
28438 IX86_BUILTIN_SUBSS_ROUND,
28439 IX86_BUILTIN_UCMPD512,
28440 IX86_BUILTIN_UCMPQ512,
28441 IX86_BUILTIN_UNPCKHPD512,
28442 IX86_BUILTIN_UNPCKHPS512,
28443 IX86_BUILTIN_UNPCKLPD512,
28444 IX86_BUILTIN_UNPCKLPS512,
28445 IX86_BUILTIN_VCVTSD2SI32,
28446 IX86_BUILTIN_VCVTSD2SI64,
28447 IX86_BUILTIN_VCVTSD2USI32,
28448 IX86_BUILTIN_VCVTSD2USI64,
28449 IX86_BUILTIN_VCVTSS2SI32,
28450 IX86_BUILTIN_VCVTSS2SI64,
28451 IX86_BUILTIN_VCVTSS2USI32,
28452 IX86_BUILTIN_VCVTSS2USI64,
28453 IX86_BUILTIN_VCVTTSD2SI32,
28454 IX86_BUILTIN_VCVTTSD2SI64,
28455 IX86_BUILTIN_VCVTTSD2USI32,
28456 IX86_BUILTIN_VCVTTSD2USI64,
28457 IX86_BUILTIN_VCVTTSS2SI32,
28458 IX86_BUILTIN_VCVTTSS2SI64,
28459 IX86_BUILTIN_VCVTTSS2USI32,
28460 IX86_BUILTIN_VCVTTSS2USI64,
28461 IX86_BUILTIN_VFMADDPD512_MASK,
28462 IX86_BUILTIN_VFMADDPD512_MASK3,
28463 IX86_BUILTIN_VFMADDPD512_MASKZ,
28464 IX86_BUILTIN_VFMADDPS512_MASK,
28465 IX86_BUILTIN_VFMADDPS512_MASK3,
28466 IX86_BUILTIN_VFMADDPS512_MASKZ,
28467 IX86_BUILTIN_VFMADDSD3_ROUND,
28468 IX86_BUILTIN_VFMADDSS3_ROUND,
28469 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28470 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28471 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28472 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28473 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28474 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28475 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28476 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28477 IX86_BUILTIN_VFMSUBPD512_MASK3,
28478 IX86_BUILTIN_VFMSUBPS512_MASK3,
28479 IX86_BUILTIN_VFMSUBSD3_MASK3,
28480 IX86_BUILTIN_VFMSUBSS3_MASK3,
28481 IX86_BUILTIN_VFNMADDPD512_MASK,
28482 IX86_BUILTIN_VFNMADDPS512_MASK,
28483 IX86_BUILTIN_VFNMSUBPD512_MASK,
28484 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28485 IX86_BUILTIN_VFNMSUBPS512_MASK,
28486 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28487 IX86_BUILTIN_VPCLZCNTD512,
28488 IX86_BUILTIN_VPCLZCNTQ512,
28489 IX86_BUILTIN_VPCONFLICTD512,
28490 IX86_BUILTIN_VPCONFLICTQ512,
28491 IX86_BUILTIN_VPERMDF512,
28492 IX86_BUILTIN_VPERMDI512,
28493 IX86_BUILTIN_VPERMI2VARD512,
28494 IX86_BUILTIN_VPERMI2VARPD512,
28495 IX86_BUILTIN_VPERMI2VARPS512,
28496 IX86_BUILTIN_VPERMI2VARQ512,
28497 IX86_BUILTIN_VPERMILPD512,
28498 IX86_BUILTIN_VPERMILPS512,
28499 IX86_BUILTIN_VPERMILVARPD512,
28500 IX86_BUILTIN_VPERMILVARPS512,
28501 IX86_BUILTIN_VPERMT2VARD512,
28502 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28503 IX86_BUILTIN_VPERMT2VARPD512,
28504 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28505 IX86_BUILTIN_VPERMT2VARPS512,
28506 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28507 IX86_BUILTIN_VPERMT2VARQ512,
28508 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28509 IX86_BUILTIN_VPERMVARDF512,
28510 IX86_BUILTIN_VPERMVARDI512,
28511 IX86_BUILTIN_VPERMVARSF512,
28512 IX86_BUILTIN_VPERMVARSI512,
28513 IX86_BUILTIN_VTERNLOGD512_MASK,
28514 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28515 IX86_BUILTIN_VTERNLOGQ512_MASK,
28516 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28518 /* Mask arithmetic operations */
28519 IX86_BUILTIN_KAND16,
28520 IX86_BUILTIN_KANDN16,
28521 IX86_BUILTIN_KNOT16,
28522 IX86_BUILTIN_KOR16,
28523 IX86_BUILTIN_KORTESTC16,
28524 IX86_BUILTIN_KORTESTZ16,
28525 IX86_BUILTIN_KUNPCKBW,
28526 IX86_BUILTIN_KXNOR16,
28527 IX86_BUILTIN_KXOR16,
28528 IX86_BUILTIN_KMOV16,
28530 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28531 where all operands are 32-byte or 64-byte wide respectively. */
28532 IX86_BUILTIN_GATHERALTSIV4DF,
28533 IX86_BUILTIN_GATHERALTDIV8SF,
28534 IX86_BUILTIN_GATHERALTSIV4DI,
28535 IX86_BUILTIN_GATHERALTDIV8SI,
28536 IX86_BUILTIN_GATHER3ALTDIV16SF,
28537 IX86_BUILTIN_GATHER3ALTDIV16SI,
28538 IX86_BUILTIN_GATHER3ALTSIV8DF,
28539 IX86_BUILTIN_GATHER3ALTSIV8DI,
28540 IX86_BUILTIN_GATHER3DIV16SF,
28541 IX86_BUILTIN_GATHER3DIV16SI,
28542 IX86_BUILTIN_GATHER3DIV8DF,
28543 IX86_BUILTIN_GATHER3DIV8DI,
28544 IX86_BUILTIN_GATHER3SIV16SF,
28545 IX86_BUILTIN_GATHER3SIV16SI,
28546 IX86_BUILTIN_GATHER3SIV8DF,
28547 IX86_BUILTIN_GATHER3SIV8DI,
28548 IX86_BUILTIN_SCATTERDIV16SF,
28549 IX86_BUILTIN_SCATTERDIV16SI,
28550 IX86_BUILTIN_SCATTERDIV8DF,
28551 IX86_BUILTIN_SCATTERDIV8DI,
28552 IX86_BUILTIN_SCATTERSIV16SF,
28553 IX86_BUILTIN_SCATTERSIV16SI,
28554 IX86_BUILTIN_SCATTERSIV8DF,
28555 IX86_BUILTIN_SCATTERSIV8DI,
28557 /* AVX512PF */
28558 IX86_BUILTIN_GATHERPFQPD,
28559 IX86_BUILTIN_GATHERPFDPS,
28560 IX86_BUILTIN_GATHERPFDPD,
28561 IX86_BUILTIN_GATHERPFQPS,
28562 IX86_BUILTIN_SCATTERPFDPD,
28563 IX86_BUILTIN_SCATTERPFDPS,
28564 IX86_BUILTIN_SCATTERPFQPD,
28565 IX86_BUILTIN_SCATTERPFQPS,
28567 /* AVX-512ER */
28568 IX86_BUILTIN_EXP2PD_MASK,
28569 IX86_BUILTIN_EXP2PS_MASK,
28570 IX86_BUILTIN_EXP2PS,
28571 IX86_BUILTIN_RCP28PD,
28572 IX86_BUILTIN_RCP28PS,
28573 IX86_BUILTIN_RCP28SD,
28574 IX86_BUILTIN_RCP28SS,
28575 IX86_BUILTIN_RSQRT28PD,
28576 IX86_BUILTIN_RSQRT28PS,
28577 IX86_BUILTIN_RSQRT28SD,
28578 IX86_BUILTIN_RSQRT28SS,
28580 /* SHA builtins. */
28581 IX86_BUILTIN_SHA1MSG1,
28582 IX86_BUILTIN_SHA1MSG2,
28583 IX86_BUILTIN_SHA1NEXTE,
28584 IX86_BUILTIN_SHA1RNDS4,
28585 IX86_BUILTIN_SHA256MSG1,
28586 IX86_BUILTIN_SHA256MSG2,
28587 IX86_BUILTIN_SHA256RNDS2,
28589 /* CLFLUSHOPT instructions. */
28590 IX86_BUILTIN_CLFLUSHOPT,
28592 /* TFmode support builtins. */
28593 IX86_BUILTIN_INFQ,
28594 IX86_BUILTIN_HUGE_VALQ,
28595 IX86_BUILTIN_FABSQ,
28596 IX86_BUILTIN_COPYSIGNQ,
28598 /* Vectorizer support builtins. */
28599 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28600 IX86_BUILTIN_CPYSGNPS,
28601 IX86_BUILTIN_CPYSGNPD,
28602 IX86_BUILTIN_CPYSGNPS256,
28603 IX86_BUILTIN_CPYSGNPS512,
28604 IX86_BUILTIN_CPYSGNPD256,
28605 IX86_BUILTIN_CPYSGNPD512,
28606 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28607 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28610 /* FMA4 instructions. */
28611 IX86_BUILTIN_VFMADDSS,
28612 IX86_BUILTIN_VFMADDSD,
28613 IX86_BUILTIN_VFMADDPS,
28614 IX86_BUILTIN_VFMADDPD,
28615 IX86_BUILTIN_VFMADDPS256,
28616 IX86_BUILTIN_VFMADDPD256,
28617 IX86_BUILTIN_VFMADDSUBPS,
28618 IX86_BUILTIN_VFMADDSUBPD,
28619 IX86_BUILTIN_VFMADDSUBPS256,
28620 IX86_BUILTIN_VFMADDSUBPD256,
28622 /* FMA3 instructions. */
28623 IX86_BUILTIN_VFMADDSS3,
28624 IX86_BUILTIN_VFMADDSD3,
28626 /* XOP instructions. */
28627 IX86_BUILTIN_VPCMOV,
28628 IX86_BUILTIN_VPCMOV_V2DI,
28629 IX86_BUILTIN_VPCMOV_V4SI,
28630 IX86_BUILTIN_VPCMOV_V8HI,
28631 IX86_BUILTIN_VPCMOV_V16QI,
28632 IX86_BUILTIN_VPCMOV_V4SF,
28633 IX86_BUILTIN_VPCMOV_V2DF,
28634 IX86_BUILTIN_VPCMOV256,
28635 IX86_BUILTIN_VPCMOV_V4DI256,
28636 IX86_BUILTIN_VPCMOV_V8SI256,
28637 IX86_BUILTIN_VPCMOV_V16HI256,
28638 IX86_BUILTIN_VPCMOV_V32QI256,
28639 IX86_BUILTIN_VPCMOV_V8SF256,
28640 IX86_BUILTIN_VPCMOV_V4DF256,
28642 IX86_BUILTIN_VPPERM,
28644 IX86_BUILTIN_VPMACSSWW,
28645 IX86_BUILTIN_VPMACSWW,
28646 IX86_BUILTIN_VPMACSSWD,
28647 IX86_BUILTIN_VPMACSWD,
28648 IX86_BUILTIN_VPMACSSDD,
28649 IX86_BUILTIN_VPMACSDD,
28650 IX86_BUILTIN_VPMACSSDQL,
28651 IX86_BUILTIN_VPMACSSDQH,
28652 IX86_BUILTIN_VPMACSDQL,
28653 IX86_BUILTIN_VPMACSDQH,
28654 IX86_BUILTIN_VPMADCSSWD,
28655 IX86_BUILTIN_VPMADCSWD,
28657 IX86_BUILTIN_VPHADDBW,
28658 IX86_BUILTIN_VPHADDBD,
28659 IX86_BUILTIN_VPHADDBQ,
28660 IX86_BUILTIN_VPHADDWD,
28661 IX86_BUILTIN_VPHADDWQ,
28662 IX86_BUILTIN_VPHADDDQ,
28663 IX86_BUILTIN_VPHADDUBW,
28664 IX86_BUILTIN_VPHADDUBD,
28665 IX86_BUILTIN_VPHADDUBQ,
28666 IX86_BUILTIN_VPHADDUWD,
28667 IX86_BUILTIN_VPHADDUWQ,
28668 IX86_BUILTIN_VPHADDUDQ,
28669 IX86_BUILTIN_VPHSUBBW,
28670 IX86_BUILTIN_VPHSUBWD,
28671 IX86_BUILTIN_VPHSUBDQ,
28673 IX86_BUILTIN_VPROTB,
28674 IX86_BUILTIN_VPROTW,
28675 IX86_BUILTIN_VPROTD,
28676 IX86_BUILTIN_VPROTQ,
28677 IX86_BUILTIN_VPROTB_IMM,
28678 IX86_BUILTIN_VPROTW_IMM,
28679 IX86_BUILTIN_VPROTD_IMM,
28680 IX86_BUILTIN_VPROTQ_IMM,
28682 IX86_BUILTIN_VPSHLB,
28683 IX86_BUILTIN_VPSHLW,
28684 IX86_BUILTIN_VPSHLD,
28685 IX86_BUILTIN_VPSHLQ,
28686 IX86_BUILTIN_VPSHAB,
28687 IX86_BUILTIN_VPSHAW,
28688 IX86_BUILTIN_VPSHAD,
28689 IX86_BUILTIN_VPSHAQ,
28691 IX86_BUILTIN_VFRCZSS,
28692 IX86_BUILTIN_VFRCZSD,
28693 IX86_BUILTIN_VFRCZPS,
28694 IX86_BUILTIN_VFRCZPD,
28695 IX86_BUILTIN_VFRCZPS256,
28696 IX86_BUILTIN_VFRCZPD256,
28698 IX86_BUILTIN_VPCOMEQUB,
28699 IX86_BUILTIN_VPCOMNEUB,
28700 IX86_BUILTIN_VPCOMLTUB,
28701 IX86_BUILTIN_VPCOMLEUB,
28702 IX86_BUILTIN_VPCOMGTUB,
28703 IX86_BUILTIN_VPCOMGEUB,
28704 IX86_BUILTIN_VPCOMFALSEUB,
28705 IX86_BUILTIN_VPCOMTRUEUB,
28707 IX86_BUILTIN_VPCOMEQUW,
28708 IX86_BUILTIN_VPCOMNEUW,
28709 IX86_BUILTIN_VPCOMLTUW,
28710 IX86_BUILTIN_VPCOMLEUW,
28711 IX86_BUILTIN_VPCOMGTUW,
28712 IX86_BUILTIN_VPCOMGEUW,
28713 IX86_BUILTIN_VPCOMFALSEUW,
28714 IX86_BUILTIN_VPCOMTRUEUW,
28716 IX86_BUILTIN_VPCOMEQUD,
28717 IX86_BUILTIN_VPCOMNEUD,
28718 IX86_BUILTIN_VPCOMLTUD,
28719 IX86_BUILTIN_VPCOMLEUD,
28720 IX86_BUILTIN_VPCOMGTUD,
28721 IX86_BUILTIN_VPCOMGEUD,
28722 IX86_BUILTIN_VPCOMFALSEUD,
28723 IX86_BUILTIN_VPCOMTRUEUD,
28725 IX86_BUILTIN_VPCOMEQUQ,
28726 IX86_BUILTIN_VPCOMNEUQ,
28727 IX86_BUILTIN_VPCOMLTUQ,
28728 IX86_BUILTIN_VPCOMLEUQ,
28729 IX86_BUILTIN_VPCOMGTUQ,
28730 IX86_BUILTIN_VPCOMGEUQ,
28731 IX86_BUILTIN_VPCOMFALSEUQ,
28732 IX86_BUILTIN_VPCOMTRUEUQ,
28734 IX86_BUILTIN_VPCOMEQB,
28735 IX86_BUILTIN_VPCOMNEB,
28736 IX86_BUILTIN_VPCOMLTB,
28737 IX86_BUILTIN_VPCOMLEB,
28738 IX86_BUILTIN_VPCOMGTB,
28739 IX86_BUILTIN_VPCOMGEB,
28740 IX86_BUILTIN_VPCOMFALSEB,
28741 IX86_BUILTIN_VPCOMTRUEB,
28743 IX86_BUILTIN_VPCOMEQW,
28744 IX86_BUILTIN_VPCOMNEW,
28745 IX86_BUILTIN_VPCOMLTW,
28746 IX86_BUILTIN_VPCOMLEW,
28747 IX86_BUILTIN_VPCOMGTW,
28748 IX86_BUILTIN_VPCOMGEW,
28749 IX86_BUILTIN_VPCOMFALSEW,
28750 IX86_BUILTIN_VPCOMTRUEW,
28752 IX86_BUILTIN_VPCOMEQD,
28753 IX86_BUILTIN_VPCOMNED,
28754 IX86_BUILTIN_VPCOMLTD,
28755 IX86_BUILTIN_VPCOMLED,
28756 IX86_BUILTIN_VPCOMGTD,
28757 IX86_BUILTIN_VPCOMGED,
28758 IX86_BUILTIN_VPCOMFALSED,
28759 IX86_BUILTIN_VPCOMTRUED,
28761 IX86_BUILTIN_VPCOMEQQ,
28762 IX86_BUILTIN_VPCOMNEQ,
28763 IX86_BUILTIN_VPCOMLTQ,
28764 IX86_BUILTIN_VPCOMLEQ,
28765 IX86_BUILTIN_VPCOMGTQ,
28766 IX86_BUILTIN_VPCOMGEQ,
28767 IX86_BUILTIN_VPCOMFALSEQ,
28768 IX86_BUILTIN_VPCOMTRUEQ,
28770 /* LWP instructions. */
28771 IX86_BUILTIN_LLWPCB,
28772 IX86_BUILTIN_SLWPCB,
28773 IX86_BUILTIN_LWPVAL32,
28774 IX86_BUILTIN_LWPVAL64,
28775 IX86_BUILTIN_LWPINS32,
28776 IX86_BUILTIN_LWPINS64,
28778 IX86_BUILTIN_CLZS,
28780 /* RTM */
28781 IX86_BUILTIN_XBEGIN,
28782 IX86_BUILTIN_XEND,
28783 IX86_BUILTIN_XABORT,
28784 IX86_BUILTIN_XTEST,
28786 /* BMI instructions. */
28787 IX86_BUILTIN_BEXTR32,
28788 IX86_BUILTIN_BEXTR64,
28789 IX86_BUILTIN_CTZS,
28791 /* TBM instructions. */
28792 IX86_BUILTIN_BEXTRI32,
28793 IX86_BUILTIN_BEXTRI64,
28795 /* BMI2 instructions. */
28796 IX86_BUILTIN_BZHI32,
28797 IX86_BUILTIN_BZHI64,
28798 IX86_BUILTIN_PDEP32,
28799 IX86_BUILTIN_PDEP64,
28800 IX86_BUILTIN_PEXT32,
28801 IX86_BUILTIN_PEXT64,
28803 /* ADX instructions. */
28804 IX86_BUILTIN_ADDCARRYX32,
28805 IX86_BUILTIN_ADDCARRYX64,
28807 /* FSGSBASE instructions. */
28808 IX86_BUILTIN_RDFSBASE32,
28809 IX86_BUILTIN_RDFSBASE64,
28810 IX86_BUILTIN_RDGSBASE32,
28811 IX86_BUILTIN_RDGSBASE64,
28812 IX86_BUILTIN_WRFSBASE32,
28813 IX86_BUILTIN_WRFSBASE64,
28814 IX86_BUILTIN_WRGSBASE32,
28815 IX86_BUILTIN_WRGSBASE64,
28817 /* RDRND instructions. */
28818 IX86_BUILTIN_RDRAND16_STEP,
28819 IX86_BUILTIN_RDRAND32_STEP,
28820 IX86_BUILTIN_RDRAND64_STEP,
28822 /* RDSEED instructions. */
28823 IX86_BUILTIN_RDSEED16_STEP,
28824 IX86_BUILTIN_RDSEED32_STEP,
28825 IX86_BUILTIN_RDSEED64_STEP,
28827 /* F16C instructions. */
28828 IX86_BUILTIN_CVTPH2PS,
28829 IX86_BUILTIN_CVTPH2PS256,
28830 IX86_BUILTIN_CVTPS2PH,
28831 IX86_BUILTIN_CVTPS2PH256,
28833 /* CFString built-in for darwin */
28834 IX86_BUILTIN_CFSTRING,
28836 /* Builtins to get CPU type and supported features. */
28837 IX86_BUILTIN_CPU_INIT,
28838 IX86_BUILTIN_CPU_IS,
28839 IX86_BUILTIN_CPU_SUPPORTS,
28841 /* Read/write FLAGS register built-ins. */
28842 IX86_BUILTIN_READ_FLAGS,
28843 IX86_BUILTIN_WRITE_FLAGS,
28845 IX86_BUILTIN_MAX
28848 /* Table for the ix86 builtin decls. */
28849 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28851 /* Table of all of the builtin functions that are possible with different ISA's
28852 but are waiting to be built until a function is declared to use that
28853 ISA. */
28854 struct builtin_isa {
28855 const char *name; /* function name */
28856 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28857 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28858 bool const_p; /* true if the declaration is constant */
28859 bool set_and_not_built_p;
28862 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28865 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28866 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28867 function decl in the ix86_builtins array. Returns the function decl or
28868 NULL_TREE, if the builtin was not added.
28870 If the front end has a special hook for builtin functions, delay adding
28871 builtin functions that aren't in the current ISA until the ISA is changed
28872 with function specific optimization. Doing so, can save about 300K for the
28873 default compiler. When the builtin is expanded, check at that time whether
28874 it is valid.
28876 If the front end doesn't have a special hook, record all builtins, even if
28877 it isn't an instruction set in the current ISA in case the user uses
28878 function specific options for a different ISA, so that we don't get scope
28879 errors if a builtin is added in the middle of a function scope. */
28881 static inline tree
28882 def_builtin (HOST_WIDE_INT mask, const char *name,
28883 enum ix86_builtin_func_type tcode,
28884 enum ix86_builtins code)
28886 tree decl = NULL_TREE;
28888 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28890 ix86_builtins_isa[(int) code].isa = mask;
28892 mask &= ~OPTION_MASK_ISA_64BIT;
28893 if (mask == 0
28894 || (mask & ix86_isa_flags) != 0
28895 || (lang_hooks.builtin_function
28896 == lang_hooks.builtin_function_ext_scope))
28899 tree type = ix86_get_builtin_func_type (tcode);
28900 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28901 NULL, NULL_TREE);
28902 ix86_builtins[(int) code] = decl;
28903 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28905 else
28907 ix86_builtins[(int) code] = NULL_TREE;
28908 ix86_builtins_isa[(int) code].tcode = tcode;
28909 ix86_builtins_isa[(int) code].name = name;
28910 ix86_builtins_isa[(int) code].const_p = false;
28911 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28915 return decl;
28918 /* Like def_builtin, but also marks the function decl "const". */
28920 static inline tree
28921 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28922 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28924 tree decl = def_builtin (mask, name, tcode, code);
28925 if (decl)
28926 TREE_READONLY (decl) = 1;
28927 else
28928 ix86_builtins_isa[(int) code].const_p = true;
28930 return decl;
28933 /* Add any new builtin functions for a given ISA that may not have been
28934 declared. This saves a bit of space compared to adding all of the
28935 declarations to the tree, even if we didn't use them. */
28937 static void
28938 ix86_add_new_builtins (HOST_WIDE_INT isa)
28940 int i;
28942 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28944 if ((ix86_builtins_isa[i].isa & isa) != 0
28945 && ix86_builtins_isa[i].set_and_not_built_p)
28947 tree decl, type;
28949 /* Don't define the builtin again. */
28950 ix86_builtins_isa[i].set_and_not_built_p = false;
28952 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28953 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28954 type, i, BUILT_IN_MD, NULL,
28955 NULL_TREE);
28957 ix86_builtins[i] = decl;
28958 if (ix86_builtins_isa[i].const_p)
28959 TREE_READONLY (decl) = 1;
28964 /* Bits for builtin_description.flag. */
28966 /* Set when we don't support the comparison natively, and should
28967 swap_comparison in order to support it. */
28968 #define BUILTIN_DESC_SWAP_OPERANDS 1
28970 struct builtin_description
28972 const HOST_WIDE_INT mask;
28973 const enum insn_code icode;
28974 const char *const name;
28975 const enum ix86_builtins code;
28976 const enum rtx_code comparison;
28977 const int flag;
28980 static const struct builtin_description bdesc_comi[] =
28982 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28987 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28988 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28992 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28993 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
29000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
29001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
29002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
29003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
29004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
29005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
29008 static const struct builtin_description bdesc_pcmpestr[] =
29010 /* SSE4.2 */
29011 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
29012 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
29013 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
29014 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
29015 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
29016 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
29017 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
29020 static const struct builtin_description bdesc_pcmpistr[] =
29022 /* SSE4.2 */
29023 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
29024 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
29025 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
29026 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
29027 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
29028 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
29029 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
29032 /* Special builtins with variable number of arguments. */
29033 static const struct builtin_description bdesc_special_args[] =
29035 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
29036 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
29037 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
29039 /* 80387 (for use internally for atomic compound assignment). */
29040 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
29041 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
29042 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
29043 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
29045 /* MMX */
29046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29048 /* 3DNow! */
29049 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29051 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29052 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29053 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29054 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29055 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29056 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29057 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29058 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29059 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29061 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29062 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29063 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29064 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29065 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29066 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29067 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29068 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29070 /* SSE */
29071 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29075 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29080 /* SSE or 3DNow!A */
29081 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29082 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29084 /* SSE2 */
29085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29087 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29092 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29099 /* SSE3 */
29100 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29102 /* SSE4.1 */
29103 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29105 /* SSE4A */
29106 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29107 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29109 /* AVX */
29110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29113 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29114 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29115 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29116 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29120 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29129 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29132 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29136 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29140 /* AVX2 */
29141 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29142 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29143 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29144 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29145 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29146 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29147 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29148 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29149 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29151 /* AVX512F */
29152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29200 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29201 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29202 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29203 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29204 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29205 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29207 /* FSGSBASE */
29208 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29209 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29210 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29211 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29212 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29213 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29214 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29215 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29217 /* RTM */
29218 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29219 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29220 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29223 /* Builtins with variable number of arguments. */
29224 static const struct builtin_description bdesc_args[] =
29226 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29227 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29228 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29229 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29230 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29231 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29232 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29234 /* MMX */
29235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29261 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29262 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29263 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29264 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29269 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29270 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29274 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29275 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29277 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29279 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29282 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29283 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29286 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29287 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29298 /* 3DNow! */
29299 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29300 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29301 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29302 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29304 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29305 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29306 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29307 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29308 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29309 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29310 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29311 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29312 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29313 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29314 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29315 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29316 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29317 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29318 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29320 /* 3DNow!A */
29321 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29322 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29323 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29324 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29325 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29326 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29328 /* SSE */
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29331 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29333 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29337 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29340 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29344 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29345 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29346 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29354 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29357 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29358 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29366 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29368 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29374 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29375 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29379 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29381 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29382 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29384 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29386 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29389 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29390 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29392 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29394 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29396 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29402 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29403 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29405 /* SSE MMX or 3Dnow!A */
29406 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29407 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29408 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29410 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29411 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29412 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29413 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29415 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29416 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29418 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29420 /* SSE2 */
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29439 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29440 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29496 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29500 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29512 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29530 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29531 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29535 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29536 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29537 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29538 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29539 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29540 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29541 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29542 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29548 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29552 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29557 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29562 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29563 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29564 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29565 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29566 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29567 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29570 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29571 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29572 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29573 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29574 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29575 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29577 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29578 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29579 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29580 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29590 /* SSE2 MMX */
29591 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29592 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29594 /* SSE3 */
29595 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29596 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29598 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29599 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29600 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29601 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29602 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29603 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29605 /* SSSE3 */
29606 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29607 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29608 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29609 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29610 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29611 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29613 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29614 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29615 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29616 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29617 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29618 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29619 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29620 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29621 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29622 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29623 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29624 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29625 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29626 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29627 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29628 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29629 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29630 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29631 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29632 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29633 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29634 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29635 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29636 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29638 /* SSSE3. */
29639 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29640 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29642 /* SSE4.1 */
29643 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29644 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29645 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29646 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29647 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29648 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29649 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29650 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29651 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29652 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29654 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29655 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29656 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29657 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29658 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29659 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29660 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29661 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29662 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29663 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29664 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29665 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29666 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29668 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29669 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29670 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29671 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29672 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29673 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29674 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29675 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29676 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29677 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29678 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29679 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29681 /* SSE4.1 */
29682 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29683 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29684 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29685 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29687 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29688 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29689 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29690 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29692 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29693 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29695 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29696 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29698 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29699 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29700 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29701 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29703 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29704 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29706 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29707 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29709 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29710 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29711 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29713 /* SSE4.2 */
29714 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29715 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29716 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29717 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29718 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29720 /* SSE4A */
29721 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29722 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29723 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29724 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29726 /* AES */
29727 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29728 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29730 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29731 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29732 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29733 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29735 /* PCLMUL */
29736 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29738 /* AVX */
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29788 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29792 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29826 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29827 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29840 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29841 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29851 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29852 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29853 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29874 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29875 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29877 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29879 /* AVX2 */
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29975 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29976 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29977 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29978 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29979 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29980 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29981 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29984 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29985 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29989 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29990 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29991 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29992 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29993 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29994 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29995 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29996 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29997 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29998 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29999 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30000 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
30001 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
30002 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
30003 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
30004 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30005 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
30006 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
30007 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
30008 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30009 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30010 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
30011 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
30012 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
30013 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
30014 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
30015 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
30016 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30017 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30018 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30019 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30020 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30023 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30027 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30029 /* BMI */
30030 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30031 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30032 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30034 /* TBM */
30035 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30036 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30038 /* F16C */
30039 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
30040 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
30041 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
30042 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
30044 /* BMI2 */
30045 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30046 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30047 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30048 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30049 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30050 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30052 /* AVX512F */
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30108 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30109 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30111 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30112 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30220 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30221 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30222 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30223 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30255 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30260 /* Mask arithmetic operations */
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30272 /* SHA */
30273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30274 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30282 /* Builtins with rounding support. */
30283 static const struct builtin_description bdesc_round_args[] =
30285 /* AVX512F */
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30305 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30307 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30314 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30316 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30366 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30368 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30370 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30372 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30374 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30376 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30378 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30380 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30387 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30391 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30393 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30394 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30395 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30396 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30402 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30403 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30404 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30406 /* AVX512ER */
30407 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30408 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30409 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30410 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30411 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30412 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30413 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30414 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30415 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30416 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30419 /* FMA4 and XOP. */
30420 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30421 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30422 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30423 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30424 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30425 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30426 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30427 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30428 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30429 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30430 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30431 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30432 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30433 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30434 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30435 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30436 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30437 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30438 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30439 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30440 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30441 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30442 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30443 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30444 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30445 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30446 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30447 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30448 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30449 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30450 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30451 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30452 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30453 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30454 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30455 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30456 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30457 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30458 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30459 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30460 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30461 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30462 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30463 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30464 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30465 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30466 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30467 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30468 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30469 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30470 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30471 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30473 static const struct builtin_description bdesc_multi_arg[] =
30475 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30476 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30477 UNKNOWN, (int)MULTI_ARG_3_SF },
30478 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30479 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30480 UNKNOWN, (int)MULTI_ARG_3_DF },
30482 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30483 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30484 UNKNOWN, (int)MULTI_ARG_3_SF },
30485 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30486 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30487 UNKNOWN, (int)MULTI_ARG_3_DF },
30489 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30490 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30491 UNKNOWN, (int)MULTI_ARG_3_SF },
30492 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30493 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30494 UNKNOWN, (int)MULTI_ARG_3_DF },
30495 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30496 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30497 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30498 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30499 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30500 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30502 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30503 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30504 UNKNOWN, (int)MULTI_ARG_3_SF },
30505 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30506 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30507 UNKNOWN, (int)MULTI_ARG_3_DF },
30508 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30509 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30510 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30511 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30512 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30513 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30663 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30664 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30666 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30669 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30670 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30671 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30675 /* TM vector builtins. */
30677 /* Reuse the existing x86-specific `struct builtin_description' cause
30678 we're lazy. Add casts to make them fit. */
30679 static const struct builtin_description bdesc_tm[] =
30681 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30682 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30683 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30684 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30685 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30686 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30687 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30689 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30690 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30691 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30692 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30693 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30694 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30695 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30697 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30698 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30699 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30700 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30701 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30702 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30703 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30705 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30706 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30707 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30710 /* TM callbacks. */
30712 /* Return the builtin decl needed to load a vector of TYPE. */
30714 static tree
30715 ix86_builtin_tm_load (tree type)
30717 if (TREE_CODE (type) == VECTOR_TYPE)
30719 switch (tree_to_uhwi (TYPE_SIZE (type)))
30721 case 64:
30722 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30723 case 128:
30724 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30725 case 256:
30726 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30729 return NULL_TREE;
30732 /* Return the builtin decl needed to store a vector of TYPE. */
30734 static tree
30735 ix86_builtin_tm_store (tree type)
30737 if (TREE_CODE (type) == VECTOR_TYPE)
30739 switch (tree_to_uhwi (TYPE_SIZE (type)))
30741 case 64:
30742 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30743 case 128:
30744 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30745 case 256:
30746 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30749 return NULL_TREE;
30752 /* Initialize the transactional memory vector load/store builtins. */
30754 static void
30755 ix86_init_tm_builtins (void)
30757 enum ix86_builtin_func_type ftype;
30758 const struct builtin_description *d;
30759 size_t i;
30760 tree decl;
30761 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30762 tree attrs_log, attrs_type_log;
30764 if (!flag_tm)
30765 return;
30767 /* If there are no builtins defined, we must be compiling in a
30768 language without trans-mem support. */
30769 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30770 return;
30772 /* Use whatever attributes a normal TM load has. */
30773 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30774 attrs_load = DECL_ATTRIBUTES (decl);
30775 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30776 /* Use whatever attributes a normal TM store has. */
30777 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30778 attrs_store = DECL_ATTRIBUTES (decl);
30779 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30780 /* Use whatever attributes a normal TM log has. */
30781 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30782 attrs_log = DECL_ATTRIBUTES (decl);
30783 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30785 for (i = 0, d = bdesc_tm;
30786 i < ARRAY_SIZE (bdesc_tm);
30787 i++, d++)
30789 if ((d->mask & ix86_isa_flags) != 0
30790 || (lang_hooks.builtin_function
30791 == lang_hooks.builtin_function_ext_scope))
30793 tree type, attrs, attrs_type;
30794 enum built_in_function code = (enum built_in_function) d->code;
30796 ftype = (enum ix86_builtin_func_type) d->flag;
30797 type = ix86_get_builtin_func_type (ftype);
30799 if (BUILTIN_TM_LOAD_P (code))
30801 attrs = attrs_load;
30802 attrs_type = attrs_type_load;
30804 else if (BUILTIN_TM_STORE_P (code))
30806 attrs = attrs_store;
30807 attrs_type = attrs_type_store;
30809 else
30811 attrs = attrs_log;
30812 attrs_type = attrs_type_log;
30814 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30815 /* The builtin without the prefix for
30816 calling it directly. */
30817 d->name + strlen ("__builtin_"),
30818 attrs);
30819 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30820 set the TYPE_ATTRIBUTES. */
30821 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30823 set_builtin_decl (code, decl, false);
30828 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30829 in the current target ISA to allow the user to compile particular modules
30830 with different target specific options that differ from the command line
30831 options. */
30832 static void
30833 ix86_init_mmx_sse_builtins (void)
30835 const struct builtin_description * d;
30836 enum ix86_builtin_func_type ftype;
30837 size_t i;
30839 /* Add all special builtins with variable number of operands. */
30840 for (i = 0, d = bdesc_special_args;
30841 i < ARRAY_SIZE (bdesc_special_args);
30842 i++, d++)
30844 if (d->name == 0)
30845 continue;
30847 ftype = (enum ix86_builtin_func_type) d->flag;
30848 def_builtin (d->mask, d->name, ftype, d->code);
30851 /* Add all builtins with variable number of operands. */
30852 for (i = 0, d = bdesc_args;
30853 i < ARRAY_SIZE (bdesc_args);
30854 i++, d++)
30856 if (d->name == 0)
30857 continue;
30859 ftype = (enum ix86_builtin_func_type) d->flag;
30860 def_builtin_const (d->mask, d->name, ftype, d->code);
30863 /* Add all builtins with rounding. */
30864 for (i = 0, d = bdesc_round_args;
30865 i < ARRAY_SIZE (bdesc_round_args);
30866 i++, d++)
30868 if (d->name == 0)
30869 continue;
30871 ftype = (enum ix86_builtin_func_type) d->flag;
30872 def_builtin_const (d->mask, d->name, ftype, d->code);
30875 /* pcmpestr[im] insns. */
30876 for (i = 0, d = bdesc_pcmpestr;
30877 i < ARRAY_SIZE (bdesc_pcmpestr);
30878 i++, d++)
30880 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30881 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30882 else
30883 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30884 def_builtin_const (d->mask, d->name, ftype, d->code);
30887 /* pcmpistr[im] insns. */
30888 for (i = 0, d = bdesc_pcmpistr;
30889 i < ARRAY_SIZE (bdesc_pcmpistr);
30890 i++, d++)
30892 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30893 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30894 else
30895 ftype = INT_FTYPE_V16QI_V16QI_INT;
30896 def_builtin_const (d->mask, d->name, ftype, d->code);
30899 /* comi/ucomi insns. */
30900 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30902 if (d->mask == OPTION_MASK_ISA_SSE2)
30903 ftype = INT_FTYPE_V2DF_V2DF;
30904 else
30905 ftype = INT_FTYPE_V4SF_V4SF;
30906 def_builtin_const (d->mask, d->name, ftype, d->code);
30909 /* SSE */
30910 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30911 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30912 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30913 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30915 /* SSE or 3DNow!A */
30916 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30917 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30918 IX86_BUILTIN_MASKMOVQ);
30920 /* SSE2 */
30921 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30922 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30924 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30925 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30926 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30927 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30929 /* SSE3. */
30930 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30931 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30932 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30933 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30935 /* AES */
30936 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30937 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30938 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30939 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30940 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30941 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30942 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30943 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30944 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30945 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30946 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30947 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30949 /* PCLMUL */
30950 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30951 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30953 /* RDRND */
30954 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30955 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30956 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30957 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30958 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30959 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30960 IX86_BUILTIN_RDRAND64_STEP);
30962 /* AVX2 */
30963 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30964 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30965 IX86_BUILTIN_GATHERSIV2DF);
30967 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30968 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30969 IX86_BUILTIN_GATHERSIV4DF);
30971 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30972 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30973 IX86_BUILTIN_GATHERDIV2DF);
30975 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30976 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30977 IX86_BUILTIN_GATHERDIV4DF);
30979 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30980 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30981 IX86_BUILTIN_GATHERSIV4SF);
30983 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30984 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30985 IX86_BUILTIN_GATHERSIV8SF);
30987 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30988 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30989 IX86_BUILTIN_GATHERDIV4SF);
30991 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30992 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30993 IX86_BUILTIN_GATHERDIV8SF);
30995 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30996 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30997 IX86_BUILTIN_GATHERSIV2DI);
30999 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
31000 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
31001 IX86_BUILTIN_GATHERSIV4DI);
31003 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
31004 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
31005 IX86_BUILTIN_GATHERDIV2DI);
31007 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
31008 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
31009 IX86_BUILTIN_GATHERDIV4DI);
31011 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
31012 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
31013 IX86_BUILTIN_GATHERSIV4SI);
31015 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31016 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31017 IX86_BUILTIN_GATHERSIV8SI);
31019 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31020 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31021 IX86_BUILTIN_GATHERDIV4SI);
31023 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31024 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31025 IX86_BUILTIN_GATHERDIV8SI);
31027 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31028 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31029 IX86_BUILTIN_GATHERALTSIV4DF);
31031 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31032 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31033 IX86_BUILTIN_GATHERALTDIV8SF);
31035 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31036 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31037 IX86_BUILTIN_GATHERALTSIV4DI);
31039 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31040 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31041 IX86_BUILTIN_GATHERALTDIV8SI);
31043 /* AVX512F */
31044 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31045 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
31046 IX86_BUILTIN_GATHER3SIV16SF);
31048 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31049 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31050 IX86_BUILTIN_GATHER3SIV8DF);
31052 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31053 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31054 IX86_BUILTIN_GATHER3DIV16SF);
31056 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31057 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31058 IX86_BUILTIN_GATHER3DIV8DF);
31060 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31061 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31062 IX86_BUILTIN_GATHER3SIV16SI);
31064 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31065 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31066 IX86_BUILTIN_GATHER3SIV8DI);
31068 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31069 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31070 IX86_BUILTIN_GATHER3DIV16SI);
31072 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31073 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31074 IX86_BUILTIN_GATHER3DIV8DI);
31076 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31077 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31078 IX86_BUILTIN_GATHER3ALTSIV8DF);
31080 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31081 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31082 IX86_BUILTIN_GATHER3ALTDIV16SF);
31084 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31085 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31086 IX86_BUILTIN_GATHER3ALTSIV8DI);
31088 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31089 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31090 IX86_BUILTIN_GATHER3ALTDIV16SI);
31092 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31093 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31094 IX86_BUILTIN_SCATTERSIV16SF);
31096 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31097 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31098 IX86_BUILTIN_SCATTERSIV8DF);
31100 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31101 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31102 IX86_BUILTIN_SCATTERDIV16SF);
31104 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31105 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31106 IX86_BUILTIN_SCATTERDIV8DF);
31108 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31109 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31110 IX86_BUILTIN_SCATTERSIV16SI);
31112 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31113 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31114 IX86_BUILTIN_SCATTERSIV8DI);
31116 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31117 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31118 IX86_BUILTIN_SCATTERDIV16SI);
31120 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31121 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31122 IX86_BUILTIN_SCATTERDIV8DI);
31124 /* AVX512PF */
31125 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31126 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31127 IX86_BUILTIN_GATHERPFDPD);
31128 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31129 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31130 IX86_BUILTIN_GATHERPFDPS);
31131 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31132 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31133 IX86_BUILTIN_GATHERPFQPD);
31134 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31135 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31136 IX86_BUILTIN_GATHERPFQPS);
31137 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31138 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31139 IX86_BUILTIN_SCATTERPFDPD);
31140 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31141 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31142 IX86_BUILTIN_SCATTERPFDPS);
31143 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31144 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31145 IX86_BUILTIN_SCATTERPFQPD);
31146 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31147 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31148 IX86_BUILTIN_SCATTERPFQPS);
31150 /* SHA */
31151 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31152 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31153 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31154 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31155 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31156 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31157 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31158 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31159 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31160 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31161 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31162 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31163 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31164 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31166 /* RTM. */
31167 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31168 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31170 /* MMX access to the vec_init patterns. */
31171 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31172 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31174 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31175 V4HI_FTYPE_HI_HI_HI_HI,
31176 IX86_BUILTIN_VEC_INIT_V4HI);
31178 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31179 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31180 IX86_BUILTIN_VEC_INIT_V8QI);
31182 /* Access to the vec_extract patterns. */
31183 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31184 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31185 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31186 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31187 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31188 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31189 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31190 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31191 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31192 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31194 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31195 "__builtin_ia32_vec_ext_v4hi",
31196 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31198 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31199 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31201 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31202 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31204 /* Access to the vec_set patterns. */
31205 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31206 "__builtin_ia32_vec_set_v2di",
31207 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31209 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31210 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31212 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31213 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31215 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31216 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31218 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31219 "__builtin_ia32_vec_set_v4hi",
31220 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31222 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31223 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31225 /* RDSEED */
31226 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31227 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31228 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31229 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31230 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31231 "__builtin_ia32_rdseed_di_step",
31232 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31234 /* ADCX */
31235 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31236 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31237 def_builtin (OPTION_MASK_ISA_64BIT,
31238 "__builtin_ia32_addcarryx_u64",
31239 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31240 IX86_BUILTIN_ADDCARRYX64);
31242 /* Read/write FLAGS. */
31243 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31244 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31245 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31246 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31247 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31248 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31249 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31250 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31252 /* CLFLUSHOPT. */
31253 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31254 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31256 /* Add FMA4 multi-arg argument instructions */
31257 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31259 if (d->name == 0)
31260 continue;
31262 ftype = (enum ix86_builtin_func_type) d->flag;
31263 def_builtin_const (d->mask, d->name, ftype, d->code);
31267 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31268 to return a pointer to VERSION_DECL if the outcome of the expression
31269 formed by PREDICATE_CHAIN is true. This function will be called during
31270 version dispatch to decide which function version to execute. It returns
31271 the basic block at the end, to which more conditions can be added. */
31273 static basic_block
31274 add_condition_to_bb (tree function_decl, tree version_decl,
31275 tree predicate_chain, basic_block new_bb)
31277 gimple return_stmt;
31278 tree convert_expr, result_var;
31279 gimple convert_stmt;
31280 gimple call_cond_stmt;
31281 gimple if_else_stmt;
31283 basic_block bb1, bb2, bb3;
31284 edge e12, e23;
31286 tree cond_var, and_expr_var = NULL_TREE;
31287 gimple_seq gseq;
31289 tree predicate_decl, predicate_arg;
31291 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31293 gcc_assert (new_bb != NULL);
31294 gseq = bb_seq (new_bb);
31297 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31298 build_fold_addr_expr (version_decl));
31299 result_var = create_tmp_var (ptr_type_node, NULL);
31300 convert_stmt = gimple_build_assign (result_var, convert_expr);
31301 return_stmt = gimple_build_return (result_var);
31303 if (predicate_chain == NULL_TREE)
31305 gimple_seq_add_stmt (&gseq, convert_stmt);
31306 gimple_seq_add_stmt (&gseq, return_stmt);
31307 set_bb_seq (new_bb, gseq);
31308 gimple_set_bb (convert_stmt, new_bb);
31309 gimple_set_bb (return_stmt, new_bb);
31310 pop_cfun ();
31311 return new_bb;
31314 while (predicate_chain != NULL)
31316 cond_var = create_tmp_var (integer_type_node, NULL);
31317 predicate_decl = TREE_PURPOSE (predicate_chain);
31318 predicate_arg = TREE_VALUE (predicate_chain);
31319 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31320 gimple_call_set_lhs (call_cond_stmt, cond_var);
31322 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31323 gimple_set_bb (call_cond_stmt, new_bb);
31324 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31326 predicate_chain = TREE_CHAIN (predicate_chain);
31328 if (and_expr_var == NULL)
31329 and_expr_var = cond_var;
31330 else
31332 gimple assign_stmt;
31333 /* Use MIN_EXPR to check if any integer is zero?.
31334 and_expr_var = min_expr <cond_var, and_expr_var> */
31335 assign_stmt = gimple_build_assign (and_expr_var,
31336 build2 (MIN_EXPR, integer_type_node,
31337 cond_var, and_expr_var));
31339 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31340 gimple_set_bb (assign_stmt, new_bb);
31341 gimple_seq_add_stmt (&gseq, assign_stmt);
31345 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31346 integer_zero_node,
31347 NULL_TREE, NULL_TREE);
31348 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31349 gimple_set_bb (if_else_stmt, new_bb);
31350 gimple_seq_add_stmt (&gseq, if_else_stmt);
31352 gimple_seq_add_stmt (&gseq, convert_stmt);
31353 gimple_seq_add_stmt (&gseq, return_stmt);
31354 set_bb_seq (new_bb, gseq);
31356 bb1 = new_bb;
31357 e12 = split_block (bb1, if_else_stmt);
31358 bb2 = e12->dest;
31359 e12->flags &= ~EDGE_FALLTHRU;
31360 e12->flags |= EDGE_TRUE_VALUE;
31362 e23 = split_block (bb2, return_stmt);
31364 gimple_set_bb (convert_stmt, bb2);
31365 gimple_set_bb (return_stmt, bb2);
31367 bb3 = e23->dest;
31368 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31370 remove_edge (e23);
31371 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31373 pop_cfun ();
31375 return bb3;
31378 /* This parses the attribute arguments to target in DECL and determines
31379 the right builtin to use to match the platform specification.
31380 It returns the priority value for this version decl. If PREDICATE_LIST
31381 is not NULL, it stores the list of cpu features that need to be checked
31382 before dispatching this function. */
31384 static unsigned int
31385 get_builtin_code_for_version (tree decl, tree *predicate_list)
31387 tree attrs;
31388 struct cl_target_option cur_target;
31389 tree target_node;
31390 struct cl_target_option *new_target;
31391 const char *arg_str = NULL;
31392 const char *attrs_str = NULL;
31393 char *tok_str = NULL;
31394 char *token;
31396 /* Priority of i386 features, greater value is higher priority. This is
31397 used to decide the order in which function dispatch must happen. For
31398 instance, a version specialized for SSE4.2 should be checked for dispatch
31399 before a version for SSE3, as SSE4.2 implies SSE3. */
31400 enum feature_priority
31402 P_ZERO = 0,
31403 P_MMX,
31404 P_SSE,
31405 P_SSE2,
31406 P_SSE3,
31407 P_SSSE3,
31408 P_PROC_SSSE3,
31409 P_SSE4_A,
31410 P_PROC_SSE4_A,
31411 P_SSE4_1,
31412 P_SSE4_2,
31413 P_PROC_SSE4_2,
31414 P_POPCNT,
31415 P_AVX,
31416 P_PROC_AVX,
31417 P_FMA4,
31418 P_XOP,
31419 P_PROC_XOP,
31420 P_FMA,
31421 P_PROC_FMA,
31422 P_AVX2,
31423 P_PROC_AVX2
31426 enum feature_priority priority = P_ZERO;
31428 /* These are the target attribute strings for which a dispatcher is
31429 available, from fold_builtin_cpu. */
31431 static struct _feature_list
31433 const char *const name;
31434 const enum feature_priority priority;
31436 const feature_list[] =
31438 {"mmx", P_MMX},
31439 {"sse", P_SSE},
31440 {"sse2", P_SSE2},
31441 {"sse3", P_SSE3},
31442 {"sse4a", P_SSE4_A},
31443 {"ssse3", P_SSSE3},
31444 {"sse4.1", P_SSE4_1},
31445 {"sse4.2", P_SSE4_2},
31446 {"popcnt", P_POPCNT},
31447 {"avx", P_AVX},
31448 {"fma4", P_FMA4},
31449 {"xop", P_XOP},
31450 {"fma", P_FMA},
31451 {"avx2", P_AVX2}
31455 static unsigned int NUM_FEATURES
31456 = sizeof (feature_list) / sizeof (struct _feature_list);
31458 unsigned int i;
31460 tree predicate_chain = NULL_TREE;
31461 tree predicate_decl, predicate_arg;
31463 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31464 gcc_assert (attrs != NULL);
31466 attrs = TREE_VALUE (TREE_VALUE (attrs));
31468 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31469 attrs_str = TREE_STRING_POINTER (attrs);
31471 /* Return priority zero for default function. */
31472 if (strcmp (attrs_str, "default") == 0)
31473 return 0;
31475 /* Handle arch= if specified. For priority, set it to be 1 more than
31476 the best instruction set the processor can handle. For instance, if
31477 there is a version for atom and a version for ssse3 (the highest ISA
31478 priority for atom), the atom version must be checked for dispatch
31479 before the ssse3 version. */
31480 if (strstr (attrs_str, "arch=") != NULL)
31482 cl_target_option_save (&cur_target, &global_options);
31483 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31484 &global_options_set);
31486 gcc_assert (target_node);
31487 new_target = TREE_TARGET_OPTION (target_node);
31488 gcc_assert (new_target);
31490 if (new_target->arch_specified && new_target->arch > 0)
31492 switch (new_target->arch)
31494 case PROCESSOR_CORE2:
31495 arg_str = "core2";
31496 priority = P_PROC_SSSE3;
31497 break;
31498 case PROCESSOR_NEHALEM:
31499 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31500 arg_str = "westmere";
31501 else
31502 /* We translate "arch=corei7" and "arch=nehalem" to
31503 "corei7" so that it will be mapped to M_INTEL_COREI7
31504 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31505 arg_str = "corei7";
31506 priority = P_PROC_SSE4_2;
31507 break;
31508 case PROCESSOR_SANDYBRIDGE:
31509 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31510 arg_str = "ivybridge";
31511 else
31512 arg_str = "sandybridge";
31513 priority = P_PROC_AVX;
31514 break;
31515 case PROCESSOR_HASWELL:
31516 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31517 arg_str = "broadwell";
31518 else
31519 arg_str = "haswell";
31520 priority = P_PROC_AVX2;
31521 break;
31522 case PROCESSOR_BONNELL:
31523 arg_str = "bonnell";
31524 priority = P_PROC_SSSE3;
31525 break;
31526 case PROCESSOR_SILVERMONT:
31527 arg_str = "silvermont";
31528 priority = P_PROC_SSE4_2;
31529 break;
31530 case PROCESSOR_AMDFAM10:
31531 arg_str = "amdfam10h";
31532 priority = P_PROC_SSE4_A;
31533 break;
31534 case PROCESSOR_BTVER1:
31535 arg_str = "btver1";
31536 priority = P_PROC_SSE4_A;
31537 break;
31538 case PROCESSOR_BTVER2:
31539 arg_str = "btver2";
31540 priority = P_PROC_AVX;
31541 break;
31542 case PROCESSOR_BDVER1:
31543 arg_str = "bdver1";
31544 priority = P_PROC_XOP;
31545 break;
31546 case PROCESSOR_BDVER2:
31547 arg_str = "bdver2";
31548 priority = P_PROC_FMA;
31549 break;
31550 case PROCESSOR_BDVER3:
31551 arg_str = "bdver3";
31552 priority = P_PROC_FMA;
31553 break;
31554 case PROCESSOR_BDVER4:
31555 arg_str = "bdver4";
31556 priority = P_PROC_AVX2;
31557 break;
31561 cl_target_option_restore (&global_options, &cur_target);
31563 if (predicate_list && arg_str == NULL)
31565 error_at (DECL_SOURCE_LOCATION (decl),
31566 "No dispatcher found for the versioning attributes");
31567 return 0;
31570 if (predicate_list)
31572 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31573 /* For a C string literal the length includes the trailing NULL. */
31574 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31575 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31576 predicate_chain);
31580 /* Process feature name. */
31581 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31582 strcpy (tok_str, attrs_str);
31583 token = strtok (tok_str, ",");
31584 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31586 while (token != NULL)
31588 /* Do not process "arch=" */
31589 if (strncmp (token, "arch=", 5) == 0)
31591 token = strtok (NULL, ",");
31592 continue;
31594 for (i = 0; i < NUM_FEATURES; ++i)
31596 if (strcmp (token, feature_list[i].name) == 0)
31598 if (predicate_list)
31600 predicate_arg = build_string_literal (
31601 strlen (feature_list[i].name) + 1,
31602 feature_list[i].name);
31603 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31604 predicate_chain);
31606 /* Find the maximum priority feature. */
31607 if (feature_list[i].priority > priority)
31608 priority = feature_list[i].priority;
31610 break;
31613 if (predicate_list && i == NUM_FEATURES)
31615 error_at (DECL_SOURCE_LOCATION (decl),
31616 "No dispatcher found for %s", token);
31617 return 0;
31619 token = strtok (NULL, ",");
31621 free (tok_str);
31623 if (predicate_list && predicate_chain == NULL_TREE)
31625 error_at (DECL_SOURCE_LOCATION (decl),
31626 "No dispatcher found for the versioning attributes : %s",
31627 attrs_str);
31628 return 0;
31630 else if (predicate_list)
31632 predicate_chain = nreverse (predicate_chain);
31633 *predicate_list = predicate_chain;
31636 return priority;
31639 /* This compares the priority of target features in function DECL1
31640 and DECL2. It returns positive value if DECL1 is higher priority,
31641 negative value if DECL2 is higher priority and 0 if they are the
31642 same. */
31644 static int
31645 ix86_compare_version_priority (tree decl1, tree decl2)
31647 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31648 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31650 return (int)priority1 - (int)priority2;
31653 /* V1 and V2 point to function versions with different priorities
31654 based on the target ISA. This function compares their priorities. */
31656 static int
31657 feature_compare (const void *v1, const void *v2)
31659 typedef struct _function_version_info
31661 tree version_decl;
31662 tree predicate_chain;
31663 unsigned int dispatch_priority;
31664 } function_version_info;
31666 const function_version_info c1 = *(const function_version_info *)v1;
31667 const function_version_info c2 = *(const function_version_info *)v2;
31668 return (c2.dispatch_priority - c1.dispatch_priority);
31671 /* This function generates the dispatch function for
31672 multi-versioned functions. DISPATCH_DECL is the function which will
31673 contain the dispatch logic. FNDECLS are the function choices for
31674 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31675 in DISPATCH_DECL in which the dispatch code is generated. */
31677 static int
31678 dispatch_function_versions (tree dispatch_decl,
31679 void *fndecls_p,
31680 basic_block *empty_bb)
31682 tree default_decl;
31683 gimple ifunc_cpu_init_stmt;
31684 gimple_seq gseq;
31685 int ix;
31686 tree ele;
31687 vec<tree> *fndecls;
31688 unsigned int num_versions = 0;
31689 unsigned int actual_versions = 0;
31690 unsigned int i;
31692 struct _function_version_info
31694 tree version_decl;
31695 tree predicate_chain;
31696 unsigned int dispatch_priority;
31697 }*function_version_info;
31699 gcc_assert (dispatch_decl != NULL
31700 && fndecls_p != NULL
31701 && empty_bb != NULL);
31703 /*fndecls_p is actually a vector. */
31704 fndecls = static_cast<vec<tree> *> (fndecls_p);
31706 /* At least one more version other than the default. */
31707 num_versions = fndecls->length ();
31708 gcc_assert (num_versions >= 2);
31710 function_version_info = (struct _function_version_info *)
31711 XNEWVEC (struct _function_version_info, (num_versions - 1));
31713 /* The first version in the vector is the default decl. */
31714 default_decl = (*fndecls)[0];
31716 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31718 gseq = bb_seq (*empty_bb);
31719 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31720 constructors, so explicity call __builtin_cpu_init here. */
31721 ifunc_cpu_init_stmt = gimple_build_call_vec (
31722 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31723 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31724 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31725 set_bb_seq (*empty_bb, gseq);
31727 pop_cfun ();
31730 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31732 tree version_decl = ele;
31733 tree predicate_chain = NULL_TREE;
31734 unsigned int priority;
31735 /* Get attribute string, parse it and find the right predicate decl.
31736 The predicate function could be a lengthy combination of many
31737 features, like arch-type and various isa-variants. */
31738 priority = get_builtin_code_for_version (version_decl,
31739 &predicate_chain);
31741 if (predicate_chain == NULL_TREE)
31742 continue;
31744 function_version_info [actual_versions].version_decl = version_decl;
31745 function_version_info [actual_versions].predicate_chain
31746 = predicate_chain;
31747 function_version_info [actual_versions].dispatch_priority = priority;
31748 actual_versions++;
31751 /* Sort the versions according to descending order of dispatch priority. The
31752 priority is based on the ISA. This is not a perfect solution. There
31753 could still be ambiguity. If more than one function version is suitable
31754 to execute, which one should be dispatched? In future, allow the user
31755 to specify a dispatch priority next to the version. */
31756 qsort (function_version_info, actual_versions,
31757 sizeof (struct _function_version_info), feature_compare);
31759 for (i = 0; i < actual_versions; ++i)
31760 *empty_bb = add_condition_to_bb (dispatch_decl,
31761 function_version_info[i].version_decl,
31762 function_version_info[i].predicate_chain,
31763 *empty_bb);
31765 /* dispatch default version at the end. */
31766 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31767 NULL, *empty_bb);
31769 free (function_version_info);
31770 return 0;
31773 /* Comparator function to be used in qsort routine to sort attribute
31774 specification strings to "target". */
31776 static int
31777 attr_strcmp (const void *v1, const void *v2)
31779 const char *c1 = *(char *const*)v1;
31780 const char *c2 = *(char *const*)v2;
31781 return strcmp (c1, c2);
31784 /* ARGLIST is the argument to target attribute. This function tokenizes
31785 the comma separated arguments, sorts them and returns a string which
31786 is a unique identifier for the comma separated arguments. It also
31787 replaces non-identifier characters "=,-" with "_". */
31789 static char *
31790 sorted_attr_string (tree arglist)
31792 tree arg;
31793 size_t str_len_sum = 0;
31794 char **args = NULL;
31795 char *attr_str, *ret_str;
31796 char *attr = NULL;
31797 unsigned int argnum = 1;
31798 unsigned int i;
31800 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31802 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31803 size_t len = strlen (str);
31804 str_len_sum += len + 1;
31805 if (arg != arglist)
31806 argnum++;
31807 for (i = 0; i < strlen (str); i++)
31808 if (str[i] == ',')
31809 argnum++;
31812 attr_str = XNEWVEC (char, str_len_sum);
31813 str_len_sum = 0;
31814 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31816 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31817 size_t len = strlen (str);
31818 memcpy (attr_str + str_len_sum, str, len);
31819 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31820 str_len_sum += len + 1;
31823 /* Replace "=,-" with "_". */
31824 for (i = 0; i < strlen (attr_str); i++)
31825 if (attr_str[i] == '=' || attr_str[i]== '-')
31826 attr_str[i] = '_';
31828 if (argnum == 1)
31829 return attr_str;
31831 args = XNEWVEC (char *, argnum);
31833 i = 0;
31834 attr = strtok (attr_str, ",");
31835 while (attr != NULL)
31837 args[i] = attr;
31838 i++;
31839 attr = strtok (NULL, ",");
31842 qsort (args, argnum, sizeof (char *), attr_strcmp);
31844 ret_str = XNEWVEC (char, str_len_sum);
31845 str_len_sum = 0;
31846 for (i = 0; i < argnum; i++)
31848 size_t len = strlen (args[i]);
31849 memcpy (ret_str + str_len_sum, args[i], len);
31850 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31851 str_len_sum += len + 1;
31854 XDELETEVEC (args);
31855 XDELETEVEC (attr_str);
31856 return ret_str;
31859 /* This function changes the assembler name for functions that are
31860 versions. If DECL is a function version and has a "target"
31861 attribute, it appends the attribute string to its assembler name. */
31863 static tree
31864 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31866 tree version_attr;
31867 const char *orig_name, *version_string;
31868 char *attr_str, *assembler_name;
31870 if (DECL_DECLARED_INLINE_P (decl)
31871 && lookup_attribute ("gnu_inline",
31872 DECL_ATTRIBUTES (decl)))
31873 error_at (DECL_SOURCE_LOCATION (decl),
31874 "Function versions cannot be marked as gnu_inline,"
31875 " bodies have to be generated");
31877 if (DECL_VIRTUAL_P (decl)
31878 || DECL_VINDEX (decl))
31879 sorry ("Virtual function multiversioning not supported");
31881 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31883 /* target attribute string cannot be NULL. */
31884 gcc_assert (version_attr != NULL_TREE);
31886 orig_name = IDENTIFIER_POINTER (id);
31887 version_string
31888 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31890 if (strcmp (version_string, "default") == 0)
31891 return id;
31893 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31894 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31896 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31898 /* Allow assembler name to be modified if already set. */
31899 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31900 SET_DECL_RTL (decl, NULL);
31902 tree ret = get_identifier (assembler_name);
31903 XDELETEVEC (attr_str);
31904 XDELETEVEC (assembler_name);
31905 return ret;
31908 /* This function returns true if FN1 and FN2 are versions of the same function,
31909 that is, the target strings of the function decls are different. This assumes
31910 that FN1 and FN2 have the same signature. */
31912 static bool
31913 ix86_function_versions (tree fn1, tree fn2)
31915 tree attr1, attr2;
31916 char *target1, *target2;
31917 bool result;
31919 if (TREE_CODE (fn1) != FUNCTION_DECL
31920 || TREE_CODE (fn2) != FUNCTION_DECL)
31921 return false;
31923 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31924 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31926 /* At least one function decl should have the target attribute specified. */
31927 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31928 return false;
31930 /* Diagnose missing target attribute if one of the decls is already
31931 multi-versioned. */
31932 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31934 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31936 if (attr2 != NULL_TREE)
31938 tree tem = fn1;
31939 fn1 = fn2;
31940 fn2 = tem;
31941 attr1 = attr2;
31943 error_at (DECL_SOURCE_LOCATION (fn2),
31944 "missing %<target%> attribute for multi-versioned %D",
31945 fn2);
31946 inform (DECL_SOURCE_LOCATION (fn1),
31947 "previous declaration of %D", fn1);
31948 /* Prevent diagnosing of the same error multiple times. */
31949 DECL_ATTRIBUTES (fn2)
31950 = tree_cons (get_identifier ("target"),
31951 copy_node (TREE_VALUE (attr1)),
31952 DECL_ATTRIBUTES (fn2));
31954 return false;
31957 target1 = sorted_attr_string (TREE_VALUE (attr1));
31958 target2 = sorted_attr_string (TREE_VALUE (attr2));
31960 /* The sorted target strings must be different for fn1 and fn2
31961 to be versions. */
31962 if (strcmp (target1, target2) == 0)
31963 result = false;
31964 else
31965 result = true;
31967 XDELETEVEC (target1);
31968 XDELETEVEC (target2);
31970 return result;
31973 static tree
31974 ix86_mangle_decl_assembler_name (tree decl, tree id)
31976 /* For function version, add the target suffix to the assembler name. */
31977 if (TREE_CODE (decl) == FUNCTION_DECL
31978 && DECL_FUNCTION_VERSIONED (decl))
31979 id = ix86_mangle_function_version_assembler_name (decl, id);
31980 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31981 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31982 #endif
31984 return id;
31987 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31988 is true, append the full path name of the source file. */
31990 static char *
31991 make_name (tree decl, const char *suffix, bool make_unique)
31993 char *global_var_name;
31994 int name_len;
31995 const char *name;
31996 const char *unique_name = NULL;
31998 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
32000 /* Get a unique name that can be used globally without any chances
32001 of collision at link time. */
32002 if (make_unique)
32003 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
32005 name_len = strlen (name) + strlen (suffix) + 2;
32007 if (make_unique)
32008 name_len += strlen (unique_name) + 1;
32009 global_var_name = XNEWVEC (char, name_len);
32011 /* Use '.' to concatenate names as it is demangler friendly. */
32012 if (make_unique)
32013 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
32014 suffix);
32015 else
32016 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
32018 return global_var_name;
32021 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32023 /* Make a dispatcher declaration for the multi-versioned function DECL.
32024 Calls to DECL function will be replaced with calls to the dispatcher
32025 by the front-end. Return the decl created. */
32027 static tree
32028 make_dispatcher_decl (const tree decl)
32030 tree func_decl;
32031 char *func_name;
32032 tree fn_type, func_type;
32033 bool is_uniq = false;
32035 if (TREE_PUBLIC (decl) == 0)
32036 is_uniq = true;
32038 func_name = make_name (decl, "ifunc", is_uniq);
32040 fn_type = TREE_TYPE (decl);
32041 func_type = build_function_type (TREE_TYPE (fn_type),
32042 TYPE_ARG_TYPES (fn_type));
32044 func_decl = build_fn_decl (func_name, func_type);
32045 XDELETEVEC (func_name);
32046 TREE_USED (func_decl) = 1;
32047 DECL_CONTEXT (func_decl) = NULL_TREE;
32048 DECL_INITIAL (func_decl) = error_mark_node;
32049 DECL_ARTIFICIAL (func_decl) = 1;
32050 /* Mark this func as external, the resolver will flip it again if
32051 it gets generated. */
32052 DECL_EXTERNAL (func_decl) = 1;
32053 /* This will be of type IFUNCs have to be externally visible. */
32054 TREE_PUBLIC (func_decl) = 1;
32056 return func_decl;
32059 #endif
32061 /* Returns true if decl is multi-versioned and DECL is the default function,
32062 that is it is not tagged with target specific optimization. */
32064 static bool
32065 is_function_default_version (const tree decl)
32067 if (TREE_CODE (decl) != FUNCTION_DECL
32068 || !DECL_FUNCTION_VERSIONED (decl))
32069 return false;
32070 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32071 gcc_assert (attr);
32072 attr = TREE_VALUE (TREE_VALUE (attr));
32073 return (TREE_CODE (attr) == STRING_CST
32074 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32077 /* Make a dispatcher declaration for the multi-versioned function DECL.
32078 Calls to DECL function will be replaced with calls to the dispatcher
32079 by the front-end. Returns the decl of the dispatcher function. */
32081 static tree
32082 ix86_get_function_versions_dispatcher (void *decl)
32084 tree fn = (tree) decl;
32085 struct cgraph_node *node = NULL;
32086 struct cgraph_node *default_node = NULL;
32087 struct cgraph_function_version_info *node_v = NULL;
32088 struct cgraph_function_version_info *first_v = NULL;
32090 tree dispatch_decl = NULL;
32092 struct cgraph_function_version_info *default_version_info = NULL;
32094 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32096 node = cgraph_node::get (fn);
32097 gcc_assert (node != NULL);
32099 node_v = node->function_version ();
32100 gcc_assert (node_v != NULL);
32102 if (node_v->dispatcher_resolver != NULL)
32103 return node_v->dispatcher_resolver;
32105 /* Find the default version and make it the first node. */
32106 first_v = node_v;
32107 /* Go to the beginning of the chain. */
32108 while (first_v->prev != NULL)
32109 first_v = first_v->prev;
32110 default_version_info = first_v;
32111 while (default_version_info != NULL)
32113 if (is_function_default_version
32114 (default_version_info->this_node->decl))
32115 break;
32116 default_version_info = default_version_info->next;
32119 /* If there is no default node, just return NULL. */
32120 if (default_version_info == NULL)
32121 return NULL;
32123 /* Make default info the first node. */
32124 if (first_v != default_version_info)
32126 default_version_info->prev->next = default_version_info->next;
32127 if (default_version_info->next)
32128 default_version_info->next->prev = default_version_info->prev;
32129 first_v->prev = default_version_info;
32130 default_version_info->next = first_v;
32131 default_version_info->prev = NULL;
32134 default_node = default_version_info->this_node;
32136 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32137 if (targetm.has_ifunc_p ())
32139 struct cgraph_function_version_info *it_v = NULL;
32140 struct cgraph_node *dispatcher_node = NULL;
32141 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32143 /* Right now, the dispatching is done via ifunc. */
32144 dispatch_decl = make_dispatcher_decl (default_node->decl);
32146 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32147 gcc_assert (dispatcher_node != NULL);
32148 dispatcher_node->dispatcher_function = 1;
32149 dispatcher_version_info
32150 = dispatcher_node->insert_new_function_version ();
32151 dispatcher_version_info->next = default_version_info;
32152 dispatcher_node->definition = 1;
32154 /* Set the dispatcher for all the versions. */
32155 it_v = default_version_info;
32156 while (it_v != NULL)
32158 it_v->dispatcher_resolver = dispatch_decl;
32159 it_v = it_v->next;
32162 else
32163 #endif
32165 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32166 "multiversioning needs ifunc which is not supported "
32167 "on this target");
32170 return dispatch_decl;
32173 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32174 it to CHAIN. */
32176 static tree
32177 make_attribute (const char *name, const char *arg_name, tree chain)
32179 tree attr_name;
32180 tree attr_arg_name;
32181 tree attr_args;
32182 tree attr;
32184 attr_name = get_identifier (name);
32185 attr_arg_name = build_string (strlen (arg_name), arg_name);
32186 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32187 attr = tree_cons (attr_name, attr_args, chain);
32188 return attr;
32191 /* Make the resolver function decl to dispatch the versions of
32192 a multi-versioned function, DEFAULT_DECL. Create an
32193 empty basic block in the resolver and store the pointer in
32194 EMPTY_BB. Return the decl of the resolver function. */
32196 static tree
32197 make_resolver_func (const tree default_decl,
32198 const tree dispatch_decl,
32199 basic_block *empty_bb)
32201 char *resolver_name;
32202 tree decl, type, decl_name, t;
32203 bool is_uniq = false;
32205 /* IFUNC's have to be globally visible. So, if the default_decl is
32206 not, then the name of the IFUNC should be made unique. */
32207 if (TREE_PUBLIC (default_decl) == 0)
32208 is_uniq = true;
32210 /* Append the filename to the resolver function if the versions are
32211 not externally visible. This is because the resolver function has
32212 to be externally visible for the loader to find it. So, appending
32213 the filename will prevent conflicts with a resolver function from
32214 another module which is based on the same version name. */
32215 resolver_name = make_name (default_decl, "resolver", is_uniq);
32217 /* The resolver function should return a (void *). */
32218 type = build_function_type_list (ptr_type_node, NULL_TREE);
32220 decl = build_fn_decl (resolver_name, type);
32221 decl_name = get_identifier (resolver_name);
32222 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32224 DECL_NAME (decl) = decl_name;
32225 TREE_USED (decl) = 1;
32226 DECL_ARTIFICIAL (decl) = 1;
32227 DECL_IGNORED_P (decl) = 0;
32228 /* IFUNC resolvers have to be externally visible. */
32229 TREE_PUBLIC (decl) = 1;
32230 DECL_UNINLINABLE (decl) = 1;
32232 /* Resolver is not external, body is generated. */
32233 DECL_EXTERNAL (decl) = 0;
32234 DECL_EXTERNAL (dispatch_decl) = 0;
32236 DECL_CONTEXT (decl) = NULL_TREE;
32237 DECL_INITIAL (decl) = make_node (BLOCK);
32238 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32240 if (DECL_COMDAT_GROUP (default_decl)
32241 || TREE_PUBLIC (default_decl))
32243 /* In this case, each translation unit with a call to this
32244 versioned function will put out a resolver. Ensure it
32245 is comdat to keep just one copy. */
32246 DECL_COMDAT (decl) = 1;
32247 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32249 /* Build result decl and add to function_decl. */
32250 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32251 DECL_ARTIFICIAL (t) = 1;
32252 DECL_IGNORED_P (t) = 1;
32253 DECL_RESULT (decl) = t;
32255 gimplify_function_tree (decl);
32256 push_cfun (DECL_STRUCT_FUNCTION (decl));
32257 *empty_bb = init_lowered_empty_function (decl, false);
32259 cgraph_node::add_new_function (decl, true);
32260 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32262 pop_cfun ();
32264 gcc_assert (dispatch_decl != NULL);
32265 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32266 DECL_ATTRIBUTES (dispatch_decl)
32267 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32269 /* Create the alias for dispatch to resolver here. */
32270 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32271 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32272 XDELETEVEC (resolver_name);
32273 return decl;
32276 /* Generate the dispatching code body to dispatch multi-versioned function
32277 DECL. The target hook is called to process the "target" attributes and
32278 provide the code to dispatch the right function at run-time. NODE points
32279 to the dispatcher decl whose body will be created. */
32281 static tree
32282 ix86_generate_version_dispatcher_body (void *node_p)
32284 tree resolver_decl;
32285 basic_block empty_bb;
32286 tree default_ver_decl;
32287 struct cgraph_node *versn;
32288 struct cgraph_node *node;
32290 struct cgraph_function_version_info *node_version_info = NULL;
32291 struct cgraph_function_version_info *versn_info = NULL;
32293 node = (cgraph_node *)node_p;
32295 node_version_info = node->function_version ();
32296 gcc_assert (node->dispatcher_function
32297 && node_version_info != NULL);
32299 if (node_version_info->dispatcher_resolver)
32300 return node_version_info->dispatcher_resolver;
32302 /* The first version in the chain corresponds to the default version. */
32303 default_ver_decl = node_version_info->next->this_node->decl;
32305 /* node is going to be an alias, so remove the finalized bit. */
32306 node->definition = false;
32308 resolver_decl = make_resolver_func (default_ver_decl,
32309 node->decl, &empty_bb);
32311 node_version_info->dispatcher_resolver = resolver_decl;
32313 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32315 auto_vec<tree, 2> fn_ver_vec;
32317 for (versn_info = node_version_info->next; versn_info;
32318 versn_info = versn_info->next)
32320 versn = versn_info->this_node;
32321 /* Check for virtual functions here again, as by this time it should
32322 have been determined if this function needs a vtable index or
32323 not. This happens for methods in derived classes that override
32324 virtual methods in base classes but are not explicitly marked as
32325 virtual. */
32326 if (DECL_VINDEX (versn->decl))
32327 sorry ("Virtual function multiversioning not supported");
32329 fn_ver_vec.safe_push (versn->decl);
32332 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32333 cgraph_edge::rebuild_edges ();
32334 pop_cfun ();
32335 return resolver_decl;
32337 /* This builds the processor_model struct type defined in
32338 libgcc/config/i386/cpuinfo.c */
32340 static tree
32341 build_processor_model_struct (void)
32343 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32344 "__cpu_features"};
32345 tree field = NULL_TREE, field_chain = NULL_TREE;
32346 int i;
32347 tree type = make_node (RECORD_TYPE);
32349 /* The first 3 fields are unsigned int. */
32350 for (i = 0; i < 3; ++i)
32352 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32353 get_identifier (field_name[i]), unsigned_type_node);
32354 if (field_chain != NULL_TREE)
32355 DECL_CHAIN (field) = field_chain;
32356 field_chain = field;
32359 /* The last field is an array of unsigned integers of size one. */
32360 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32361 get_identifier (field_name[3]),
32362 build_array_type (unsigned_type_node,
32363 build_index_type (size_one_node)));
32364 if (field_chain != NULL_TREE)
32365 DECL_CHAIN (field) = field_chain;
32366 field_chain = field;
32368 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32369 return type;
32372 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32374 static tree
32375 make_var_decl (tree type, const char *name)
32377 tree new_decl;
32379 new_decl = build_decl (UNKNOWN_LOCATION,
32380 VAR_DECL,
32381 get_identifier(name),
32382 type);
32384 DECL_EXTERNAL (new_decl) = 1;
32385 TREE_STATIC (new_decl) = 1;
32386 TREE_PUBLIC (new_decl) = 1;
32387 DECL_INITIAL (new_decl) = 0;
32388 DECL_ARTIFICIAL (new_decl) = 0;
32389 DECL_PRESERVE_P (new_decl) = 1;
32391 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32392 assemble_variable (new_decl, 0, 0, 0);
32394 return new_decl;
32397 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32398 into an integer defined in libgcc/config/i386/cpuinfo.c */
32400 static tree
32401 fold_builtin_cpu (tree fndecl, tree *args)
32403 unsigned int i;
32404 enum ix86_builtins fn_code = (enum ix86_builtins)
32405 DECL_FUNCTION_CODE (fndecl);
32406 tree param_string_cst = NULL;
32408 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32409 enum processor_features
32411 F_CMOV = 0,
32412 F_MMX,
32413 F_POPCNT,
32414 F_SSE,
32415 F_SSE2,
32416 F_SSE3,
32417 F_SSSE3,
32418 F_SSE4_1,
32419 F_SSE4_2,
32420 F_AVX,
32421 F_AVX2,
32422 F_SSE4_A,
32423 F_FMA4,
32424 F_XOP,
32425 F_FMA,
32426 F_MAX
32429 /* These are the values for vendor types and cpu types and subtypes
32430 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32431 the corresponding start value. */
32432 enum processor_model
32434 M_INTEL = 1,
32435 M_AMD,
32436 M_CPU_TYPE_START,
32437 M_INTEL_BONNELL,
32438 M_INTEL_CORE2,
32439 M_INTEL_COREI7,
32440 M_AMDFAM10H,
32441 M_AMDFAM15H,
32442 M_INTEL_SILVERMONT,
32443 M_AMD_BTVER1,
32444 M_AMD_BTVER2,
32445 M_CPU_SUBTYPE_START,
32446 M_INTEL_COREI7_NEHALEM,
32447 M_INTEL_COREI7_WESTMERE,
32448 M_INTEL_COREI7_SANDYBRIDGE,
32449 M_AMDFAM10H_BARCELONA,
32450 M_AMDFAM10H_SHANGHAI,
32451 M_AMDFAM10H_ISTANBUL,
32452 M_AMDFAM15H_BDVER1,
32453 M_AMDFAM15H_BDVER2,
32454 M_AMDFAM15H_BDVER3,
32455 M_AMDFAM15H_BDVER4,
32456 M_INTEL_COREI7_IVYBRIDGE,
32457 M_INTEL_COREI7_HASWELL
32460 static struct _arch_names_table
32462 const char *const name;
32463 const enum processor_model model;
32465 const arch_names_table[] =
32467 {"amd", M_AMD},
32468 {"intel", M_INTEL},
32469 {"atom", M_INTEL_BONNELL},
32470 {"slm", M_INTEL_SILVERMONT},
32471 {"core2", M_INTEL_CORE2},
32472 {"corei7", M_INTEL_COREI7},
32473 {"nehalem", M_INTEL_COREI7_NEHALEM},
32474 {"westmere", M_INTEL_COREI7_WESTMERE},
32475 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32476 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32477 {"haswell", M_INTEL_COREI7_HASWELL},
32478 {"bonnell", M_INTEL_BONNELL},
32479 {"silvermont", M_INTEL_SILVERMONT},
32480 {"amdfam10h", M_AMDFAM10H},
32481 {"barcelona", M_AMDFAM10H_BARCELONA},
32482 {"shanghai", M_AMDFAM10H_SHANGHAI},
32483 {"istanbul", M_AMDFAM10H_ISTANBUL},
32484 {"btver1", M_AMD_BTVER1},
32485 {"amdfam15h", M_AMDFAM15H},
32486 {"bdver1", M_AMDFAM15H_BDVER1},
32487 {"bdver2", M_AMDFAM15H_BDVER2},
32488 {"bdver3", M_AMDFAM15H_BDVER3},
32489 {"bdver4", M_AMDFAM15H_BDVER4},
32490 {"btver2", M_AMD_BTVER2},
32493 static struct _isa_names_table
32495 const char *const name;
32496 const enum processor_features feature;
32498 const isa_names_table[] =
32500 {"cmov", F_CMOV},
32501 {"mmx", F_MMX},
32502 {"popcnt", F_POPCNT},
32503 {"sse", F_SSE},
32504 {"sse2", F_SSE2},
32505 {"sse3", F_SSE3},
32506 {"ssse3", F_SSSE3},
32507 {"sse4a", F_SSE4_A},
32508 {"sse4.1", F_SSE4_1},
32509 {"sse4.2", F_SSE4_2},
32510 {"avx", F_AVX},
32511 {"fma4", F_FMA4},
32512 {"xop", F_XOP},
32513 {"fma", F_FMA},
32514 {"avx2", F_AVX2}
32517 tree __processor_model_type = build_processor_model_struct ();
32518 tree __cpu_model_var = make_var_decl (__processor_model_type,
32519 "__cpu_model");
32522 varpool_node::add (__cpu_model_var);
32524 gcc_assert ((args != NULL) && (*args != NULL));
32526 param_string_cst = *args;
32527 while (param_string_cst
32528 && TREE_CODE (param_string_cst) != STRING_CST)
32530 /* *args must be a expr that can contain other EXPRS leading to a
32531 STRING_CST. */
32532 if (!EXPR_P (param_string_cst))
32534 error ("Parameter to builtin must be a string constant or literal");
32535 return integer_zero_node;
32537 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32540 gcc_assert (param_string_cst);
32542 if (fn_code == IX86_BUILTIN_CPU_IS)
32544 tree ref;
32545 tree field;
32546 tree final;
32548 unsigned int field_val = 0;
32549 unsigned int NUM_ARCH_NAMES
32550 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32552 for (i = 0; i < NUM_ARCH_NAMES; i++)
32553 if (strcmp (arch_names_table[i].name,
32554 TREE_STRING_POINTER (param_string_cst)) == 0)
32555 break;
32557 if (i == NUM_ARCH_NAMES)
32559 error ("Parameter to builtin not valid: %s",
32560 TREE_STRING_POINTER (param_string_cst));
32561 return integer_zero_node;
32564 field = TYPE_FIELDS (__processor_model_type);
32565 field_val = arch_names_table[i].model;
32567 /* CPU types are stored in the next field. */
32568 if (field_val > M_CPU_TYPE_START
32569 && field_val < M_CPU_SUBTYPE_START)
32571 field = DECL_CHAIN (field);
32572 field_val -= M_CPU_TYPE_START;
32575 /* CPU subtypes are stored in the next field. */
32576 if (field_val > M_CPU_SUBTYPE_START)
32578 field = DECL_CHAIN ( DECL_CHAIN (field));
32579 field_val -= M_CPU_SUBTYPE_START;
32582 /* Get the appropriate field in __cpu_model. */
32583 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32584 field, NULL_TREE);
32586 /* Check the value. */
32587 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32588 build_int_cstu (unsigned_type_node, field_val));
32589 return build1 (CONVERT_EXPR, integer_type_node, final);
32591 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32593 tree ref;
32594 tree array_elt;
32595 tree field;
32596 tree final;
32598 unsigned int field_val = 0;
32599 unsigned int NUM_ISA_NAMES
32600 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32602 for (i = 0; i < NUM_ISA_NAMES; i++)
32603 if (strcmp (isa_names_table[i].name,
32604 TREE_STRING_POINTER (param_string_cst)) == 0)
32605 break;
32607 if (i == NUM_ISA_NAMES)
32609 error ("Parameter to builtin not valid: %s",
32610 TREE_STRING_POINTER (param_string_cst));
32611 return integer_zero_node;
32614 field = TYPE_FIELDS (__processor_model_type);
32615 /* Get the last field, which is __cpu_features. */
32616 while (DECL_CHAIN (field))
32617 field = DECL_CHAIN (field);
32619 /* Get the appropriate field: __cpu_model.__cpu_features */
32620 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32621 field, NULL_TREE);
32623 /* Access the 0th element of __cpu_features array. */
32624 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32625 integer_zero_node, NULL_TREE, NULL_TREE);
32627 field_val = (1 << isa_names_table[i].feature);
32628 /* Return __cpu_model.__cpu_features[0] & field_val */
32629 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32630 build_int_cstu (unsigned_type_node, field_val));
32631 return build1 (CONVERT_EXPR, integer_type_node, final);
32633 gcc_unreachable ();
32636 static tree
32637 ix86_fold_builtin (tree fndecl, int n_args,
32638 tree *args, bool ignore ATTRIBUTE_UNUSED)
32640 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32642 enum ix86_builtins fn_code = (enum ix86_builtins)
32643 DECL_FUNCTION_CODE (fndecl);
32644 if (fn_code == IX86_BUILTIN_CPU_IS
32645 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32647 gcc_assert (n_args == 1);
32648 return fold_builtin_cpu (fndecl, args);
32652 #ifdef SUBTARGET_FOLD_BUILTIN
32653 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32654 #endif
32656 return NULL_TREE;
32659 /* Make builtins to detect cpu type and features supported. NAME is
32660 the builtin name, CODE is the builtin code, and FTYPE is the function
32661 type of the builtin. */
32663 static void
32664 make_cpu_type_builtin (const char* name, int code,
32665 enum ix86_builtin_func_type ftype, bool is_const)
32667 tree decl;
32668 tree type;
32670 type = ix86_get_builtin_func_type (ftype);
32671 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32672 NULL, NULL_TREE);
32673 gcc_assert (decl != NULL_TREE);
32674 ix86_builtins[(int) code] = decl;
32675 TREE_READONLY (decl) = is_const;
32678 /* Make builtins to get CPU type and features supported. The created
32679 builtins are :
32681 __builtin_cpu_init (), to detect cpu type and features,
32682 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32683 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32686 static void
32687 ix86_init_platform_type_builtins (void)
32689 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32690 INT_FTYPE_VOID, false);
32691 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32692 INT_FTYPE_PCCHAR, true);
32693 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32694 INT_FTYPE_PCCHAR, true);
32697 /* Internal method for ix86_init_builtins. */
32699 static void
32700 ix86_init_builtins_va_builtins_abi (void)
32702 tree ms_va_ref, sysv_va_ref;
32703 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32704 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32705 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32706 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32708 if (!TARGET_64BIT)
32709 return;
32710 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32711 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32712 ms_va_ref = build_reference_type (ms_va_list_type_node);
32713 sysv_va_ref =
32714 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32716 fnvoid_va_end_ms =
32717 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32718 fnvoid_va_start_ms =
32719 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32720 fnvoid_va_end_sysv =
32721 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32722 fnvoid_va_start_sysv =
32723 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32724 NULL_TREE);
32725 fnvoid_va_copy_ms =
32726 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32727 NULL_TREE);
32728 fnvoid_va_copy_sysv =
32729 build_function_type_list (void_type_node, sysv_va_ref,
32730 sysv_va_ref, NULL_TREE);
32732 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32733 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32734 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32735 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32736 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32737 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32738 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32739 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32740 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32741 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32742 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32743 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32746 static void
32747 ix86_init_builtin_types (void)
32749 tree float128_type_node, float80_type_node;
32751 /* The __float80 type. */
32752 float80_type_node = long_double_type_node;
32753 if (TYPE_MODE (float80_type_node) != XFmode)
32755 /* The __float80 type. */
32756 float80_type_node = make_node (REAL_TYPE);
32758 TYPE_PRECISION (float80_type_node) = 80;
32759 layout_type (float80_type_node);
32761 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32763 /* The __float128 type. */
32764 float128_type_node = make_node (REAL_TYPE);
32765 TYPE_PRECISION (float128_type_node) = 128;
32766 layout_type (float128_type_node);
32767 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32769 /* This macro is built by i386-builtin-types.awk. */
32770 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32773 static void
32774 ix86_init_builtins (void)
32776 tree t;
32778 ix86_init_builtin_types ();
32780 /* Builtins to get CPU type and features. */
32781 ix86_init_platform_type_builtins ();
32783 /* TFmode support builtins. */
32784 def_builtin_const (0, "__builtin_infq",
32785 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32786 def_builtin_const (0, "__builtin_huge_valq",
32787 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32789 /* We will expand them to normal call if SSE isn't available since
32790 they are used by libgcc. */
32791 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32792 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32793 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32794 TREE_READONLY (t) = 1;
32795 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32797 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32798 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32799 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32800 TREE_READONLY (t) = 1;
32801 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32803 ix86_init_tm_builtins ();
32804 ix86_init_mmx_sse_builtins ();
32806 if (TARGET_LP64)
32807 ix86_init_builtins_va_builtins_abi ();
32809 #ifdef SUBTARGET_INIT_BUILTINS
32810 SUBTARGET_INIT_BUILTINS;
32811 #endif
32814 /* Return the ix86 builtin for CODE. */
32816 static tree
32817 ix86_builtin_decl (unsigned code, bool)
32819 if (code >= IX86_BUILTIN_MAX)
32820 return error_mark_node;
32822 return ix86_builtins[code];
32825 /* Errors in the source file can cause expand_expr to return const0_rtx
32826 where we expect a vector. To avoid crashing, use one of the vector
32827 clear instructions. */
32828 static rtx
32829 safe_vector_operand (rtx x, enum machine_mode mode)
32831 if (x == const0_rtx)
32832 x = CONST0_RTX (mode);
32833 return x;
32836 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32838 static rtx
32839 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32841 rtx pat;
32842 tree arg0 = CALL_EXPR_ARG (exp, 0);
32843 tree arg1 = CALL_EXPR_ARG (exp, 1);
32844 rtx op0 = expand_normal (arg0);
32845 rtx op1 = expand_normal (arg1);
32846 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32847 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32848 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32850 if (VECTOR_MODE_P (mode0))
32851 op0 = safe_vector_operand (op0, mode0);
32852 if (VECTOR_MODE_P (mode1))
32853 op1 = safe_vector_operand (op1, mode1);
32855 if (optimize || !target
32856 || GET_MODE (target) != tmode
32857 || !insn_data[icode].operand[0].predicate (target, tmode))
32858 target = gen_reg_rtx (tmode);
32860 if (GET_MODE (op1) == SImode && mode1 == TImode)
32862 rtx x = gen_reg_rtx (V4SImode);
32863 emit_insn (gen_sse2_loadd (x, op1));
32864 op1 = gen_lowpart (TImode, x);
32867 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32868 op0 = copy_to_mode_reg (mode0, op0);
32869 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32870 op1 = copy_to_mode_reg (mode1, op1);
32872 pat = GEN_FCN (icode) (target, op0, op1);
32873 if (! pat)
32874 return 0;
32876 emit_insn (pat);
32878 return target;
32881 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32883 static rtx
32884 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32885 enum ix86_builtin_func_type m_type,
32886 enum rtx_code sub_code)
32888 rtx pat;
32889 int i;
32890 int nargs;
32891 bool comparison_p = false;
32892 bool tf_p = false;
32893 bool last_arg_constant = false;
32894 int num_memory = 0;
32895 struct {
32896 rtx op;
32897 enum machine_mode mode;
32898 } args[4];
32900 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32902 switch (m_type)
32904 case MULTI_ARG_4_DF2_DI_I:
32905 case MULTI_ARG_4_DF2_DI_I1:
32906 case MULTI_ARG_4_SF2_SI_I:
32907 case MULTI_ARG_4_SF2_SI_I1:
32908 nargs = 4;
32909 last_arg_constant = true;
32910 break;
32912 case MULTI_ARG_3_SF:
32913 case MULTI_ARG_3_DF:
32914 case MULTI_ARG_3_SF2:
32915 case MULTI_ARG_3_DF2:
32916 case MULTI_ARG_3_DI:
32917 case MULTI_ARG_3_SI:
32918 case MULTI_ARG_3_SI_DI:
32919 case MULTI_ARG_3_HI:
32920 case MULTI_ARG_3_HI_SI:
32921 case MULTI_ARG_3_QI:
32922 case MULTI_ARG_3_DI2:
32923 case MULTI_ARG_3_SI2:
32924 case MULTI_ARG_3_HI2:
32925 case MULTI_ARG_3_QI2:
32926 nargs = 3;
32927 break;
32929 case MULTI_ARG_2_SF:
32930 case MULTI_ARG_2_DF:
32931 case MULTI_ARG_2_DI:
32932 case MULTI_ARG_2_SI:
32933 case MULTI_ARG_2_HI:
32934 case MULTI_ARG_2_QI:
32935 nargs = 2;
32936 break;
32938 case MULTI_ARG_2_DI_IMM:
32939 case MULTI_ARG_2_SI_IMM:
32940 case MULTI_ARG_2_HI_IMM:
32941 case MULTI_ARG_2_QI_IMM:
32942 nargs = 2;
32943 last_arg_constant = true;
32944 break;
32946 case MULTI_ARG_1_SF:
32947 case MULTI_ARG_1_DF:
32948 case MULTI_ARG_1_SF2:
32949 case MULTI_ARG_1_DF2:
32950 case MULTI_ARG_1_DI:
32951 case MULTI_ARG_1_SI:
32952 case MULTI_ARG_1_HI:
32953 case MULTI_ARG_1_QI:
32954 case MULTI_ARG_1_SI_DI:
32955 case MULTI_ARG_1_HI_DI:
32956 case MULTI_ARG_1_HI_SI:
32957 case MULTI_ARG_1_QI_DI:
32958 case MULTI_ARG_1_QI_SI:
32959 case MULTI_ARG_1_QI_HI:
32960 nargs = 1;
32961 break;
32963 case MULTI_ARG_2_DI_CMP:
32964 case MULTI_ARG_2_SI_CMP:
32965 case MULTI_ARG_2_HI_CMP:
32966 case MULTI_ARG_2_QI_CMP:
32967 nargs = 2;
32968 comparison_p = true;
32969 break;
32971 case MULTI_ARG_2_SF_TF:
32972 case MULTI_ARG_2_DF_TF:
32973 case MULTI_ARG_2_DI_TF:
32974 case MULTI_ARG_2_SI_TF:
32975 case MULTI_ARG_2_HI_TF:
32976 case MULTI_ARG_2_QI_TF:
32977 nargs = 2;
32978 tf_p = true;
32979 break;
32981 default:
32982 gcc_unreachable ();
32985 if (optimize || !target
32986 || GET_MODE (target) != tmode
32987 || !insn_data[icode].operand[0].predicate (target, tmode))
32988 target = gen_reg_rtx (tmode);
32990 gcc_assert (nargs <= 4);
32992 for (i = 0; i < nargs; i++)
32994 tree arg = CALL_EXPR_ARG (exp, i);
32995 rtx op = expand_normal (arg);
32996 int adjust = (comparison_p) ? 1 : 0;
32997 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32999 if (last_arg_constant && i == nargs - 1)
33001 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
33003 enum insn_code new_icode = icode;
33004 switch (icode)
33006 case CODE_FOR_xop_vpermil2v2df3:
33007 case CODE_FOR_xop_vpermil2v4sf3:
33008 case CODE_FOR_xop_vpermil2v4df3:
33009 case CODE_FOR_xop_vpermil2v8sf3:
33010 error ("the last argument must be a 2-bit immediate");
33011 return gen_reg_rtx (tmode);
33012 case CODE_FOR_xop_rotlv2di3:
33013 new_icode = CODE_FOR_rotlv2di3;
33014 goto xop_rotl;
33015 case CODE_FOR_xop_rotlv4si3:
33016 new_icode = CODE_FOR_rotlv4si3;
33017 goto xop_rotl;
33018 case CODE_FOR_xop_rotlv8hi3:
33019 new_icode = CODE_FOR_rotlv8hi3;
33020 goto xop_rotl;
33021 case CODE_FOR_xop_rotlv16qi3:
33022 new_icode = CODE_FOR_rotlv16qi3;
33023 xop_rotl:
33024 if (CONST_INT_P (op))
33026 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
33027 op = GEN_INT (INTVAL (op) & mask);
33028 gcc_checking_assert
33029 (insn_data[icode].operand[i + 1].predicate (op, mode));
33031 else
33033 gcc_checking_assert
33034 (nargs == 2
33035 && insn_data[new_icode].operand[0].mode == tmode
33036 && insn_data[new_icode].operand[1].mode == tmode
33037 && insn_data[new_icode].operand[2].mode == mode
33038 && insn_data[new_icode].operand[0].predicate
33039 == insn_data[icode].operand[0].predicate
33040 && insn_data[new_icode].operand[1].predicate
33041 == insn_data[icode].operand[1].predicate);
33042 icode = new_icode;
33043 goto non_constant;
33045 break;
33046 default:
33047 gcc_unreachable ();
33051 else
33053 non_constant:
33054 if (VECTOR_MODE_P (mode))
33055 op = safe_vector_operand (op, mode);
33057 /* If we aren't optimizing, only allow one memory operand to be
33058 generated. */
33059 if (memory_operand (op, mode))
33060 num_memory++;
33062 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33064 if (optimize
33065 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33066 || num_memory > 1)
33067 op = force_reg (mode, op);
33070 args[i].op = op;
33071 args[i].mode = mode;
33074 switch (nargs)
33076 case 1:
33077 pat = GEN_FCN (icode) (target, args[0].op);
33078 break;
33080 case 2:
33081 if (tf_p)
33082 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33083 GEN_INT ((int)sub_code));
33084 else if (! comparison_p)
33085 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33086 else
33088 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33089 args[0].op,
33090 args[1].op);
33092 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33094 break;
33096 case 3:
33097 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33098 break;
33100 case 4:
33101 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33102 break;
33104 default:
33105 gcc_unreachable ();
33108 if (! pat)
33109 return 0;
33111 emit_insn (pat);
33112 return target;
33115 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33116 insns with vec_merge. */
33118 static rtx
33119 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33120 rtx target)
33122 rtx pat;
33123 tree arg0 = CALL_EXPR_ARG (exp, 0);
33124 rtx op1, op0 = expand_normal (arg0);
33125 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33126 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33128 if (optimize || !target
33129 || GET_MODE (target) != tmode
33130 || !insn_data[icode].operand[0].predicate (target, tmode))
33131 target = gen_reg_rtx (tmode);
33133 if (VECTOR_MODE_P (mode0))
33134 op0 = safe_vector_operand (op0, mode0);
33136 if ((optimize && !register_operand (op0, mode0))
33137 || !insn_data[icode].operand[1].predicate (op0, mode0))
33138 op0 = copy_to_mode_reg (mode0, op0);
33140 op1 = op0;
33141 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33142 op1 = copy_to_mode_reg (mode0, op1);
33144 pat = GEN_FCN (icode) (target, op0, op1);
33145 if (! pat)
33146 return 0;
33147 emit_insn (pat);
33148 return target;
33151 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33153 static rtx
33154 ix86_expand_sse_compare (const struct builtin_description *d,
33155 tree exp, rtx target, bool swap)
33157 rtx pat;
33158 tree arg0 = CALL_EXPR_ARG (exp, 0);
33159 tree arg1 = CALL_EXPR_ARG (exp, 1);
33160 rtx op0 = expand_normal (arg0);
33161 rtx op1 = expand_normal (arg1);
33162 rtx op2;
33163 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33164 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33165 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33166 enum rtx_code comparison = d->comparison;
33168 if (VECTOR_MODE_P (mode0))
33169 op0 = safe_vector_operand (op0, mode0);
33170 if (VECTOR_MODE_P (mode1))
33171 op1 = safe_vector_operand (op1, mode1);
33173 /* Swap operands if we have a comparison that isn't available in
33174 hardware. */
33175 if (swap)
33177 rtx tmp = gen_reg_rtx (mode1);
33178 emit_move_insn (tmp, op1);
33179 op1 = op0;
33180 op0 = tmp;
33183 if (optimize || !target
33184 || GET_MODE (target) != tmode
33185 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33186 target = gen_reg_rtx (tmode);
33188 if ((optimize && !register_operand (op0, mode0))
33189 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33190 op0 = copy_to_mode_reg (mode0, op0);
33191 if ((optimize && !register_operand (op1, mode1))
33192 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33193 op1 = copy_to_mode_reg (mode1, op1);
33195 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33196 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33197 if (! pat)
33198 return 0;
33199 emit_insn (pat);
33200 return target;
33203 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33205 static rtx
33206 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33207 rtx target)
33209 rtx pat;
33210 tree arg0 = CALL_EXPR_ARG (exp, 0);
33211 tree arg1 = CALL_EXPR_ARG (exp, 1);
33212 rtx op0 = expand_normal (arg0);
33213 rtx op1 = expand_normal (arg1);
33214 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33215 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33216 enum rtx_code comparison = d->comparison;
33218 if (VECTOR_MODE_P (mode0))
33219 op0 = safe_vector_operand (op0, mode0);
33220 if (VECTOR_MODE_P (mode1))
33221 op1 = safe_vector_operand (op1, mode1);
33223 /* Swap operands if we have a comparison that isn't available in
33224 hardware. */
33225 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33227 rtx tmp = op1;
33228 op1 = op0;
33229 op0 = tmp;
33232 target = gen_reg_rtx (SImode);
33233 emit_move_insn (target, const0_rtx);
33234 target = gen_rtx_SUBREG (QImode, target, 0);
33236 if ((optimize && !register_operand (op0, mode0))
33237 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33238 op0 = copy_to_mode_reg (mode0, op0);
33239 if ((optimize && !register_operand (op1, mode1))
33240 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33241 op1 = copy_to_mode_reg (mode1, op1);
33243 pat = GEN_FCN (d->icode) (op0, op1);
33244 if (! pat)
33245 return 0;
33246 emit_insn (pat);
33247 emit_insn (gen_rtx_SET (VOIDmode,
33248 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33249 gen_rtx_fmt_ee (comparison, QImode,
33250 SET_DEST (pat),
33251 const0_rtx)));
33253 return SUBREG_REG (target);
33256 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33258 static rtx
33259 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33260 rtx target)
33262 rtx pat;
33263 tree arg0 = CALL_EXPR_ARG (exp, 0);
33264 rtx op1, op0 = expand_normal (arg0);
33265 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33266 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33268 if (optimize || target == 0
33269 || GET_MODE (target) != tmode
33270 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33271 target = gen_reg_rtx (tmode);
33273 if (VECTOR_MODE_P (mode0))
33274 op0 = safe_vector_operand (op0, mode0);
33276 if ((optimize && !register_operand (op0, mode0))
33277 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33278 op0 = copy_to_mode_reg (mode0, op0);
33280 op1 = GEN_INT (d->comparison);
33282 pat = GEN_FCN (d->icode) (target, op0, op1);
33283 if (! pat)
33284 return 0;
33285 emit_insn (pat);
33286 return target;
33289 static rtx
33290 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33291 tree exp, rtx target)
33293 rtx pat;
33294 tree arg0 = CALL_EXPR_ARG (exp, 0);
33295 tree arg1 = CALL_EXPR_ARG (exp, 1);
33296 rtx op0 = expand_normal (arg0);
33297 rtx op1 = expand_normal (arg1);
33298 rtx op2;
33299 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33300 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33301 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33303 if (optimize || target == 0
33304 || GET_MODE (target) != tmode
33305 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33306 target = gen_reg_rtx (tmode);
33308 op0 = safe_vector_operand (op0, mode0);
33309 op1 = safe_vector_operand (op1, mode1);
33311 if ((optimize && !register_operand (op0, mode0))
33312 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33313 op0 = copy_to_mode_reg (mode0, op0);
33314 if ((optimize && !register_operand (op1, mode1))
33315 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33316 op1 = copy_to_mode_reg (mode1, op1);
33318 op2 = GEN_INT (d->comparison);
33320 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33321 if (! pat)
33322 return 0;
33323 emit_insn (pat);
33324 return target;
33327 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33329 static rtx
33330 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33331 rtx target)
33333 rtx pat;
33334 tree arg0 = CALL_EXPR_ARG (exp, 0);
33335 tree arg1 = CALL_EXPR_ARG (exp, 1);
33336 rtx op0 = expand_normal (arg0);
33337 rtx op1 = expand_normal (arg1);
33338 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33339 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33340 enum rtx_code comparison = d->comparison;
33342 if (VECTOR_MODE_P (mode0))
33343 op0 = safe_vector_operand (op0, mode0);
33344 if (VECTOR_MODE_P (mode1))
33345 op1 = safe_vector_operand (op1, mode1);
33347 target = gen_reg_rtx (SImode);
33348 emit_move_insn (target, const0_rtx);
33349 target = gen_rtx_SUBREG (QImode, target, 0);
33351 if ((optimize && !register_operand (op0, mode0))
33352 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33353 op0 = copy_to_mode_reg (mode0, op0);
33354 if ((optimize && !register_operand (op1, mode1))
33355 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33356 op1 = copy_to_mode_reg (mode1, op1);
33358 pat = GEN_FCN (d->icode) (op0, op1);
33359 if (! pat)
33360 return 0;
33361 emit_insn (pat);
33362 emit_insn (gen_rtx_SET (VOIDmode,
33363 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33364 gen_rtx_fmt_ee (comparison, QImode,
33365 SET_DEST (pat),
33366 const0_rtx)));
33368 return SUBREG_REG (target);
33371 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33373 static rtx
33374 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33375 tree exp, rtx target)
33377 rtx pat;
33378 tree arg0 = CALL_EXPR_ARG (exp, 0);
33379 tree arg1 = CALL_EXPR_ARG (exp, 1);
33380 tree arg2 = CALL_EXPR_ARG (exp, 2);
33381 tree arg3 = CALL_EXPR_ARG (exp, 3);
33382 tree arg4 = CALL_EXPR_ARG (exp, 4);
33383 rtx scratch0, scratch1;
33384 rtx op0 = expand_normal (arg0);
33385 rtx op1 = expand_normal (arg1);
33386 rtx op2 = expand_normal (arg2);
33387 rtx op3 = expand_normal (arg3);
33388 rtx op4 = expand_normal (arg4);
33389 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33391 tmode0 = insn_data[d->icode].operand[0].mode;
33392 tmode1 = insn_data[d->icode].operand[1].mode;
33393 modev2 = insn_data[d->icode].operand[2].mode;
33394 modei3 = insn_data[d->icode].operand[3].mode;
33395 modev4 = insn_data[d->icode].operand[4].mode;
33396 modei5 = insn_data[d->icode].operand[5].mode;
33397 modeimm = insn_data[d->icode].operand[6].mode;
33399 if (VECTOR_MODE_P (modev2))
33400 op0 = safe_vector_operand (op0, modev2);
33401 if (VECTOR_MODE_P (modev4))
33402 op2 = safe_vector_operand (op2, modev4);
33404 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33405 op0 = copy_to_mode_reg (modev2, op0);
33406 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33407 op1 = copy_to_mode_reg (modei3, op1);
33408 if ((optimize && !register_operand (op2, modev4))
33409 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33410 op2 = copy_to_mode_reg (modev4, op2);
33411 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33412 op3 = copy_to_mode_reg (modei5, op3);
33414 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33416 error ("the fifth argument must be an 8-bit immediate");
33417 return const0_rtx;
33420 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33422 if (optimize || !target
33423 || GET_MODE (target) != tmode0
33424 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33425 target = gen_reg_rtx (tmode0);
33427 scratch1 = gen_reg_rtx (tmode1);
33429 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33431 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33433 if (optimize || !target
33434 || GET_MODE (target) != tmode1
33435 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33436 target = gen_reg_rtx (tmode1);
33438 scratch0 = gen_reg_rtx (tmode0);
33440 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33442 else
33444 gcc_assert (d->flag);
33446 scratch0 = gen_reg_rtx (tmode0);
33447 scratch1 = gen_reg_rtx (tmode1);
33449 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33452 if (! pat)
33453 return 0;
33455 emit_insn (pat);
33457 if (d->flag)
33459 target = gen_reg_rtx (SImode);
33460 emit_move_insn (target, const0_rtx);
33461 target = gen_rtx_SUBREG (QImode, target, 0);
33463 emit_insn
33464 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33465 gen_rtx_fmt_ee (EQ, QImode,
33466 gen_rtx_REG ((enum machine_mode) d->flag,
33467 FLAGS_REG),
33468 const0_rtx)));
33469 return SUBREG_REG (target);
33471 else
33472 return target;
33476 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33478 static rtx
33479 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33480 tree exp, rtx target)
33482 rtx pat;
33483 tree arg0 = CALL_EXPR_ARG (exp, 0);
33484 tree arg1 = CALL_EXPR_ARG (exp, 1);
33485 tree arg2 = CALL_EXPR_ARG (exp, 2);
33486 rtx scratch0, scratch1;
33487 rtx op0 = expand_normal (arg0);
33488 rtx op1 = expand_normal (arg1);
33489 rtx op2 = expand_normal (arg2);
33490 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33492 tmode0 = insn_data[d->icode].operand[0].mode;
33493 tmode1 = insn_data[d->icode].operand[1].mode;
33494 modev2 = insn_data[d->icode].operand[2].mode;
33495 modev3 = insn_data[d->icode].operand[3].mode;
33496 modeimm = insn_data[d->icode].operand[4].mode;
33498 if (VECTOR_MODE_P (modev2))
33499 op0 = safe_vector_operand (op0, modev2);
33500 if (VECTOR_MODE_P (modev3))
33501 op1 = safe_vector_operand (op1, modev3);
33503 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33504 op0 = copy_to_mode_reg (modev2, op0);
33505 if ((optimize && !register_operand (op1, modev3))
33506 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33507 op1 = copy_to_mode_reg (modev3, op1);
33509 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33511 error ("the third argument must be an 8-bit immediate");
33512 return const0_rtx;
33515 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33517 if (optimize || !target
33518 || GET_MODE (target) != tmode0
33519 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33520 target = gen_reg_rtx (tmode0);
33522 scratch1 = gen_reg_rtx (tmode1);
33524 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33526 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33528 if (optimize || !target
33529 || GET_MODE (target) != tmode1
33530 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33531 target = gen_reg_rtx (tmode1);
33533 scratch0 = gen_reg_rtx (tmode0);
33535 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33537 else
33539 gcc_assert (d->flag);
33541 scratch0 = gen_reg_rtx (tmode0);
33542 scratch1 = gen_reg_rtx (tmode1);
33544 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33547 if (! pat)
33548 return 0;
33550 emit_insn (pat);
33552 if (d->flag)
33554 target = gen_reg_rtx (SImode);
33555 emit_move_insn (target, const0_rtx);
33556 target = gen_rtx_SUBREG (QImode, target, 0);
33558 emit_insn
33559 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33560 gen_rtx_fmt_ee (EQ, QImode,
33561 gen_rtx_REG ((enum machine_mode) d->flag,
33562 FLAGS_REG),
33563 const0_rtx)));
33564 return SUBREG_REG (target);
33566 else
33567 return target;
33570 /* Subroutine of ix86_expand_builtin to take care of insns with
33571 variable number of operands. */
33573 static rtx
33574 ix86_expand_args_builtin (const struct builtin_description *d,
33575 tree exp, rtx target)
33577 rtx pat, real_target;
33578 unsigned int i, nargs;
33579 unsigned int nargs_constant = 0;
33580 unsigned int mask_pos = 0;
33581 int num_memory = 0;
33582 struct
33584 rtx op;
33585 enum machine_mode mode;
33586 } args[6];
33587 bool last_arg_count = false;
33588 enum insn_code icode = d->icode;
33589 const struct insn_data_d *insn_p = &insn_data[icode];
33590 enum machine_mode tmode = insn_p->operand[0].mode;
33591 enum machine_mode rmode = VOIDmode;
33592 bool swap = false;
33593 enum rtx_code comparison = d->comparison;
33595 switch ((enum ix86_builtin_func_type) d->flag)
33597 case V2DF_FTYPE_V2DF_ROUND:
33598 case V4DF_FTYPE_V4DF_ROUND:
33599 case V4SF_FTYPE_V4SF_ROUND:
33600 case V8SF_FTYPE_V8SF_ROUND:
33601 case V4SI_FTYPE_V4SF_ROUND:
33602 case V8SI_FTYPE_V8SF_ROUND:
33603 return ix86_expand_sse_round (d, exp, target);
33604 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33605 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33606 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33607 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33608 case INT_FTYPE_V8SF_V8SF_PTEST:
33609 case INT_FTYPE_V4DI_V4DI_PTEST:
33610 case INT_FTYPE_V4DF_V4DF_PTEST:
33611 case INT_FTYPE_V4SF_V4SF_PTEST:
33612 case INT_FTYPE_V2DI_V2DI_PTEST:
33613 case INT_FTYPE_V2DF_V2DF_PTEST:
33614 return ix86_expand_sse_ptest (d, exp, target);
33615 case FLOAT128_FTYPE_FLOAT128:
33616 case FLOAT_FTYPE_FLOAT:
33617 case INT_FTYPE_INT:
33618 case UINT64_FTYPE_INT:
33619 case UINT16_FTYPE_UINT16:
33620 case INT64_FTYPE_INT64:
33621 case INT64_FTYPE_V4SF:
33622 case INT64_FTYPE_V2DF:
33623 case INT_FTYPE_V16QI:
33624 case INT_FTYPE_V8QI:
33625 case INT_FTYPE_V8SF:
33626 case INT_FTYPE_V4DF:
33627 case INT_FTYPE_V4SF:
33628 case INT_FTYPE_V2DF:
33629 case INT_FTYPE_V32QI:
33630 case V16QI_FTYPE_V16QI:
33631 case V8SI_FTYPE_V8SF:
33632 case V8SI_FTYPE_V4SI:
33633 case V8HI_FTYPE_V8HI:
33634 case V8HI_FTYPE_V16QI:
33635 case V8QI_FTYPE_V8QI:
33636 case V8SF_FTYPE_V8SF:
33637 case V8SF_FTYPE_V8SI:
33638 case V8SF_FTYPE_V4SF:
33639 case V8SF_FTYPE_V8HI:
33640 case V4SI_FTYPE_V4SI:
33641 case V4SI_FTYPE_V16QI:
33642 case V4SI_FTYPE_V4SF:
33643 case V4SI_FTYPE_V8SI:
33644 case V4SI_FTYPE_V8HI:
33645 case V4SI_FTYPE_V4DF:
33646 case V4SI_FTYPE_V2DF:
33647 case V4HI_FTYPE_V4HI:
33648 case V4DF_FTYPE_V4DF:
33649 case V4DF_FTYPE_V4SI:
33650 case V4DF_FTYPE_V4SF:
33651 case V4DF_FTYPE_V2DF:
33652 case V4SF_FTYPE_V4SF:
33653 case V4SF_FTYPE_V4SI:
33654 case V4SF_FTYPE_V8SF:
33655 case V4SF_FTYPE_V4DF:
33656 case V4SF_FTYPE_V8HI:
33657 case V4SF_FTYPE_V2DF:
33658 case V2DI_FTYPE_V2DI:
33659 case V2DI_FTYPE_V16QI:
33660 case V2DI_FTYPE_V8HI:
33661 case V2DI_FTYPE_V4SI:
33662 case V2DF_FTYPE_V2DF:
33663 case V2DF_FTYPE_V4SI:
33664 case V2DF_FTYPE_V4DF:
33665 case V2DF_FTYPE_V4SF:
33666 case V2DF_FTYPE_V2SI:
33667 case V2SI_FTYPE_V2SI:
33668 case V2SI_FTYPE_V4SF:
33669 case V2SI_FTYPE_V2SF:
33670 case V2SI_FTYPE_V2DF:
33671 case V2SF_FTYPE_V2SF:
33672 case V2SF_FTYPE_V2SI:
33673 case V32QI_FTYPE_V32QI:
33674 case V32QI_FTYPE_V16QI:
33675 case V16HI_FTYPE_V16HI:
33676 case V16HI_FTYPE_V8HI:
33677 case V8SI_FTYPE_V8SI:
33678 case V16HI_FTYPE_V16QI:
33679 case V8SI_FTYPE_V16QI:
33680 case V4DI_FTYPE_V16QI:
33681 case V8SI_FTYPE_V8HI:
33682 case V4DI_FTYPE_V8HI:
33683 case V4DI_FTYPE_V4SI:
33684 case V4DI_FTYPE_V2DI:
33685 case HI_FTYPE_HI:
33686 case UINT_FTYPE_V2DF:
33687 case UINT_FTYPE_V4SF:
33688 case UINT64_FTYPE_V2DF:
33689 case UINT64_FTYPE_V4SF:
33690 case V16QI_FTYPE_V8DI:
33691 case V16HI_FTYPE_V16SI:
33692 case V16SI_FTYPE_HI:
33693 case V16SI_FTYPE_V16SI:
33694 case V16SI_FTYPE_INT:
33695 case V16SF_FTYPE_FLOAT:
33696 case V16SF_FTYPE_V8SF:
33697 case V16SI_FTYPE_V8SI:
33698 case V16SF_FTYPE_V4SF:
33699 case V16SI_FTYPE_V4SI:
33700 case V16SF_FTYPE_V16SF:
33701 case V8HI_FTYPE_V8DI:
33702 case V8UHI_FTYPE_V8UHI:
33703 case V8SI_FTYPE_V8DI:
33704 case V8USI_FTYPE_V8USI:
33705 case V8SF_FTYPE_V8DF:
33706 case V8DI_FTYPE_QI:
33707 case V8DI_FTYPE_INT64:
33708 case V8DI_FTYPE_V4DI:
33709 case V8DI_FTYPE_V8DI:
33710 case V8DF_FTYPE_DOUBLE:
33711 case V8DF_FTYPE_V4DF:
33712 case V8DF_FTYPE_V2DF:
33713 case V8DF_FTYPE_V8DF:
33714 case V8DF_FTYPE_V8SI:
33715 nargs = 1;
33716 break;
33717 case V4SF_FTYPE_V4SF_VEC_MERGE:
33718 case V2DF_FTYPE_V2DF_VEC_MERGE:
33719 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33720 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33721 case V16QI_FTYPE_V16QI_V16QI:
33722 case V16QI_FTYPE_V8HI_V8HI:
33723 case V16SI_FTYPE_V16SI_V16SI:
33724 case V16SF_FTYPE_V16SF_V16SF:
33725 case V16SF_FTYPE_V16SF_V16SI:
33726 case V8QI_FTYPE_V8QI_V8QI:
33727 case V8QI_FTYPE_V4HI_V4HI:
33728 case V8HI_FTYPE_V8HI_V8HI:
33729 case V8HI_FTYPE_V16QI_V16QI:
33730 case V8HI_FTYPE_V4SI_V4SI:
33731 case V8SF_FTYPE_V8SF_V8SF:
33732 case V8SF_FTYPE_V8SF_V8SI:
33733 case V8DI_FTYPE_V8DI_V8DI:
33734 case V8DF_FTYPE_V8DF_V8DF:
33735 case V8DF_FTYPE_V8DF_V8DI:
33736 case V4SI_FTYPE_V4SI_V4SI:
33737 case V4SI_FTYPE_V8HI_V8HI:
33738 case V4SI_FTYPE_V4SF_V4SF:
33739 case V4SI_FTYPE_V2DF_V2DF:
33740 case V4HI_FTYPE_V4HI_V4HI:
33741 case V4HI_FTYPE_V8QI_V8QI:
33742 case V4HI_FTYPE_V2SI_V2SI:
33743 case V4DF_FTYPE_V4DF_V4DF:
33744 case V4DF_FTYPE_V4DF_V4DI:
33745 case V4SF_FTYPE_V4SF_V4SF:
33746 case V4SF_FTYPE_V4SF_V4SI:
33747 case V4SF_FTYPE_V4SF_V2SI:
33748 case V4SF_FTYPE_V4SF_V2DF:
33749 case V4SF_FTYPE_V4SF_UINT:
33750 case V4SF_FTYPE_V4SF_UINT64:
33751 case V4SF_FTYPE_V4SF_DI:
33752 case V4SF_FTYPE_V4SF_SI:
33753 case V2DI_FTYPE_V2DI_V2DI:
33754 case V2DI_FTYPE_V16QI_V16QI:
33755 case V2DI_FTYPE_V4SI_V4SI:
33756 case V2UDI_FTYPE_V4USI_V4USI:
33757 case V2DI_FTYPE_V2DI_V16QI:
33758 case V2DI_FTYPE_V2DF_V2DF:
33759 case V2SI_FTYPE_V2SI_V2SI:
33760 case V2SI_FTYPE_V4HI_V4HI:
33761 case V2SI_FTYPE_V2SF_V2SF:
33762 case V2DF_FTYPE_V2DF_V2DF:
33763 case V2DF_FTYPE_V2DF_V4SF:
33764 case V2DF_FTYPE_V2DF_V2DI:
33765 case V2DF_FTYPE_V2DF_DI:
33766 case V2DF_FTYPE_V2DF_SI:
33767 case V2DF_FTYPE_V2DF_UINT:
33768 case V2DF_FTYPE_V2DF_UINT64:
33769 case V2SF_FTYPE_V2SF_V2SF:
33770 case V1DI_FTYPE_V1DI_V1DI:
33771 case V1DI_FTYPE_V8QI_V8QI:
33772 case V1DI_FTYPE_V2SI_V2SI:
33773 case V32QI_FTYPE_V16HI_V16HI:
33774 case V16HI_FTYPE_V8SI_V8SI:
33775 case V32QI_FTYPE_V32QI_V32QI:
33776 case V16HI_FTYPE_V32QI_V32QI:
33777 case V16HI_FTYPE_V16HI_V16HI:
33778 case V8SI_FTYPE_V4DF_V4DF:
33779 case V8SI_FTYPE_V8SI_V8SI:
33780 case V8SI_FTYPE_V16HI_V16HI:
33781 case V4DI_FTYPE_V4DI_V4DI:
33782 case V4DI_FTYPE_V8SI_V8SI:
33783 case V4UDI_FTYPE_V8USI_V8USI:
33784 case QI_FTYPE_V8DI_V8DI:
33785 case HI_FTYPE_V16SI_V16SI:
33786 if (comparison == UNKNOWN)
33787 return ix86_expand_binop_builtin (icode, exp, target);
33788 nargs = 2;
33789 break;
33790 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33791 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33792 gcc_assert (comparison != UNKNOWN);
33793 nargs = 2;
33794 swap = true;
33795 break;
33796 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33797 case V16HI_FTYPE_V16HI_SI_COUNT:
33798 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33799 case V8SI_FTYPE_V8SI_SI_COUNT:
33800 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33801 case V4DI_FTYPE_V4DI_INT_COUNT:
33802 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33803 case V8HI_FTYPE_V8HI_SI_COUNT:
33804 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33805 case V4SI_FTYPE_V4SI_SI_COUNT:
33806 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33807 case V4HI_FTYPE_V4HI_SI_COUNT:
33808 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33809 case V2DI_FTYPE_V2DI_SI_COUNT:
33810 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33811 case V2SI_FTYPE_V2SI_SI_COUNT:
33812 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33813 case V1DI_FTYPE_V1DI_SI_COUNT:
33814 nargs = 2;
33815 last_arg_count = true;
33816 break;
33817 case UINT64_FTYPE_UINT64_UINT64:
33818 case UINT_FTYPE_UINT_UINT:
33819 case UINT_FTYPE_UINT_USHORT:
33820 case UINT_FTYPE_UINT_UCHAR:
33821 case UINT16_FTYPE_UINT16_INT:
33822 case UINT8_FTYPE_UINT8_INT:
33823 case HI_FTYPE_HI_HI:
33824 case V16SI_FTYPE_V8DF_V8DF:
33825 nargs = 2;
33826 break;
33827 case V2DI_FTYPE_V2DI_INT_CONVERT:
33828 nargs = 2;
33829 rmode = V1TImode;
33830 nargs_constant = 1;
33831 break;
33832 case V4DI_FTYPE_V4DI_INT_CONVERT:
33833 nargs = 2;
33834 rmode = V2TImode;
33835 nargs_constant = 1;
33836 break;
33837 case V8HI_FTYPE_V8HI_INT:
33838 case V8HI_FTYPE_V8SF_INT:
33839 case V16HI_FTYPE_V16SF_INT:
33840 case V8HI_FTYPE_V4SF_INT:
33841 case V8SF_FTYPE_V8SF_INT:
33842 case V4SF_FTYPE_V16SF_INT:
33843 case V16SF_FTYPE_V16SF_INT:
33844 case V4SI_FTYPE_V4SI_INT:
33845 case V4SI_FTYPE_V8SI_INT:
33846 case V4HI_FTYPE_V4HI_INT:
33847 case V4DF_FTYPE_V4DF_INT:
33848 case V4DF_FTYPE_V8DF_INT:
33849 case V4SF_FTYPE_V4SF_INT:
33850 case V4SF_FTYPE_V8SF_INT:
33851 case V2DI_FTYPE_V2DI_INT:
33852 case V2DF_FTYPE_V2DF_INT:
33853 case V2DF_FTYPE_V4DF_INT:
33854 case V16HI_FTYPE_V16HI_INT:
33855 case V8SI_FTYPE_V8SI_INT:
33856 case V16SI_FTYPE_V16SI_INT:
33857 case V4SI_FTYPE_V16SI_INT:
33858 case V4DI_FTYPE_V4DI_INT:
33859 case V2DI_FTYPE_V4DI_INT:
33860 case V4DI_FTYPE_V8DI_INT:
33861 case HI_FTYPE_HI_INT:
33862 nargs = 2;
33863 nargs_constant = 1;
33864 break;
33865 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33866 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33867 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33868 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33869 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33870 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33871 case HI_FTYPE_V16SI_V16SI_HI:
33872 case QI_FTYPE_V8DI_V8DI_QI:
33873 case V16HI_FTYPE_V16SI_V16HI_HI:
33874 case V16QI_FTYPE_V16SI_V16QI_HI:
33875 case V16QI_FTYPE_V8DI_V16QI_QI:
33876 case V16SF_FTYPE_V16SF_V16SF_HI:
33877 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33878 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33879 case V16SF_FTYPE_V16SI_V16SF_HI:
33880 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33881 case V16SF_FTYPE_V4SF_V16SF_HI:
33882 case V16SI_FTYPE_SI_V16SI_HI:
33883 case V16SI_FTYPE_V16HI_V16SI_HI:
33884 case V16SI_FTYPE_V16QI_V16SI_HI:
33885 case V16SI_FTYPE_V16SF_V16SI_HI:
33886 case V16SI_FTYPE_V16SI_V16SI_HI:
33887 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33888 case V16SI_FTYPE_V4SI_V16SI_HI:
33889 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33890 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33891 case V8DF_FTYPE_V2DF_V8DF_QI:
33892 case V8DF_FTYPE_V4DF_V8DF_QI:
33893 case V8DF_FTYPE_V8DF_V8DF_QI:
33894 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33895 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33896 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33897 case V8DF_FTYPE_V8SF_V8DF_QI:
33898 case V8DF_FTYPE_V8SI_V8DF_QI:
33899 case V8DI_FTYPE_DI_V8DI_QI:
33900 case V8DI_FTYPE_V16QI_V8DI_QI:
33901 case V8DI_FTYPE_V2DI_V8DI_QI:
33902 case V8DI_FTYPE_V4DI_V8DI_QI:
33903 case V8DI_FTYPE_V8DI_V8DI_QI:
33904 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33905 case V8DI_FTYPE_V8HI_V8DI_QI:
33906 case V8DI_FTYPE_V8SI_V8DI_QI:
33907 case V8HI_FTYPE_V8DI_V8HI_QI:
33908 case V8SF_FTYPE_V8DF_V8SF_QI:
33909 case V8SI_FTYPE_V8DF_V8SI_QI:
33910 case V8SI_FTYPE_V8DI_V8SI_QI:
33911 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33912 nargs = 3;
33913 break;
33914 case V32QI_FTYPE_V32QI_V32QI_INT:
33915 case V16HI_FTYPE_V16HI_V16HI_INT:
33916 case V16QI_FTYPE_V16QI_V16QI_INT:
33917 case V4DI_FTYPE_V4DI_V4DI_INT:
33918 case V8HI_FTYPE_V8HI_V8HI_INT:
33919 case V8SI_FTYPE_V8SI_V8SI_INT:
33920 case V8SI_FTYPE_V8SI_V4SI_INT:
33921 case V8SF_FTYPE_V8SF_V8SF_INT:
33922 case V8SF_FTYPE_V8SF_V4SF_INT:
33923 case V4SI_FTYPE_V4SI_V4SI_INT:
33924 case V4DF_FTYPE_V4DF_V4DF_INT:
33925 case V16SF_FTYPE_V16SF_V16SF_INT:
33926 case V16SF_FTYPE_V16SF_V4SF_INT:
33927 case V16SI_FTYPE_V16SI_V4SI_INT:
33928 case V4DF_FTYPE_V4DF_V2DF_INT:
33929 case V4SF_FTYPE_V4SF_V4SF_INT:
33930 case V2DI_FTYPE_V2DI_V2DI_INT:
33931 case V4DI_FTYPE_V4DI_V2DI_INT:
33932 case V2DF_FTYPE_V2DF_V2DF_INT:
33933 case QI_FTYPE_V8DI_V8DI_INT:
33934 case QI_FTYPE_V8DF_V8DF_INT:
33935 case QI_FTYPE_V2DF_V2DF_INT:
33936 case QI_FTYPE_V4SF_V4SF_INT:
33937 case HI_FTYPE_V16SI_V16SI_INT:
33938 case HI_FTYPE_V16SF_V16SF_INT:
33939 nargs = 3;
33940 nargs_constant = 1;
33941 break;
33942 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33943 nargs = 3;
33944 rmode = V4DImode;
33945 nargs_constant = 1;
33946 break;
33947 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33948 nargs = 3;
33949 rmode = V2DImode;
33950 nargs_constant = 1;
33951 break;
33952 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33953 nargs = 3;
33954 rmode = DImode;
33955 nargs_constant = 1;
33956 break;
33957 case V2DI_FTYPE_V2DI_UINT_UINT:
33958 nargs = 3;
33959 nargs_constant = 2;
33960 break;
33961 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33962 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33963 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33964 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33965 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33966 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33967 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33968 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33969 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33970 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33971 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33972 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33973 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33974 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33975 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33976 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33977 nargs = 4;
33978 break;
33979 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33980 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33981 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33982 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33983 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33984 nargs = 4;
33985 nargs_constant = 1;
33986 break;
33987 case QI_FTYPE_V2DF_V2DF_INT_QI:
33988 case QI_FTYPE_V4SF_V4SF_INT_QI:
33989 nargs = 4;
33990 mask_pos = 1;
33991 nargs_constant = 1;
33992 break;
33993 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33994 nargs = 4;
33995 nargs_constant = 2;
33996 break;
33997 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33998 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33999 nargs = 4;
34000 break;
34001 case QI_FTYPE_V8DI_V8DI_INT_QI:
34002 case HI_FTYPE_V16SI_V16SI_INT_HI:
34003 case QI_FTYPE_V8DF_V8DF_INT_QI:
34004 case HI_FTYPE_V16SF_V16SF_INT_HI:
34005 mask_pos = 1;
34006 nargs = 4;
34007 nargs_constant = 1;
34008 break;
34009 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
34010 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
34011 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
34012 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
34013 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
34014 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
34015 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
34016 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
34017 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
34018 nargs = 4;
34019 mask_pos = 2;
34020 nargs_constant = 1;
34021 break;
34022 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
34023 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
34024 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
34025 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
34026 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
34027 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
34028 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
34029 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
34030 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
34031 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
34032 nargs = 5;
34033 mask_pos = 2;
34034 nargs_constant = 1;
34035 break;
34036 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
34037 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
34038 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
34039 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
34040 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
34041 nargs = 5;
34042 mask_pos = 1;
34043 nargs_constant = 1;
34044 break;
34046 default:
34047 gcc_unreachable ();
34050 gcc_assert (nargs <= ARRAY_SIZE (args));
34052 if (comparison != UNKNOWN)
34054 gcc_assert (nargs == 2);
34055 return ix86_expand_sse_compare (d, exp, target, swap);
34058 if (rmode == VOIDmode || rmode == tmode)
34060 if (optimize
34061 || target == 0
34062 || GET_MODE (target) != tmode
34063 || !insn_p->operand[0].predicate (target, tmode))
34064 target = gen_reg_rtx (tmode);
34065 real_target = target;
34067 else
34069 real_target = gen_reg_rtx (tmode);
34070 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34073 for (i = 0; i < nargs; i++)
34075 tree arg = CALL_EXPR_ARG (exp, i);
34076 rtx op = expand_normal (arg);
34077 enum machine_mode mode = insn_p->operand[i + 1].mode;
34078 bool match = insn_p->operand[i + 1].predicate (op, mode);
34080 if (last_arg_count && (i + 1) == nargs)
34082 /* SIMD shift insns take either an 8-bit immediate or
34083 register as count. But builtin functions take int as
34084 count. If count doesn't match, we put it in register. */
34085 if (!match)
34087 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34088 if (!insn_p->operand[i + 1].predicate (op, mode))
34089 op = copy_to_reg (op);
34092 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34093 (!mask_pos && (nargs - i) <= nargs_constant))
34095 if (!match)
34096 switch (icode)
34098 case CODE_FOR_avx2_inserti128:
34099 case CODE_FOR_avx2_extracti128:
34100 error ("the last argument must be an 1-bit immediate");
34101 return const0_rtx;
34103 case CODE_FOR_avx512f_cmpv8di3_mask:
34104 case CODE_FOR_avx512f_cmpv16si3_mask:
34105 case CODE_FOR_avx512f_ucmpv8di3_mask:
34106 case CODE_FOR_avx512f_ucmpv16si3_mask:
34107 error ("the last argument must be a 3-bit immediate");
34108 return const0_rtx;
34110 case CODE_FOR_sse4_1_roundsd:
34111 case CODE_FOR_sse4_1_roundss:
34113 case CODE_FOR_sse4_1_roundpd:
34114 case CODE_FOR_sse4_1_roundps:
34115 case CODE_FOR_avx_roundpd256:
34116 case CODE_FOR_avx_roundps256:
34118 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34119 case CODE_FOR_sse4_1_roundps_sfix:
34120 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34121 case CODE_FOR_avx_roundps_sfix256:
34123 case CODE_FOR_sse4_1_blendps:
34124 case CODE_FOR_avx_blendpd256:
34125 case CODE_FOR_avx_vpermilv4df:
34126 case CODE_FOR_avx512f_getmantv8df_mask:
34127 case CODE_FOR_avx512f_getmantv16sf_mask:
34128 error ("the last argument must be a 4-bit immediate");
34129 return const0_rtx;
34131 case CODE_FOR_sha1rnds4:
34132 case CODE_FOR_sse4_1_blendpd:
34133 case CODE_FOR_avx_vpermilv2df:
34134 case CODE_FOR_xop_vpermil2v2df3:
34135 case CODE_FOR_xop_vpermil2v4sf3:
34136 case CODE_FOR_xop_vpermil2v4df3:
34137 case CODE_FOR_xop_vpermil2v8sf3:
34138 case CODE_FOR_avx512f_vinsertf32x4_mask:
34139 case CODE_FOR_avx512f_vinserti32x4_mask:
34140 case CODE_FOR_avx512f_vextractf32x4_mask:
34141 case CODE_FOR_avx512f_vextracti32x4_mask:
34142 error ("the last argument must be a 2-bit immediate");
34143 return const0_rtx;
34145 case CODE_FOR_avx_vextractf128v4df:
34146 case CODE_FOR_avx_vextractf128v8sf:
34147 case CODE_FOR_avx_vextractf128v8si:
34148 case CODE_FOR_avx_vinsertf128v4df:
34149 case CODE_FOR_avx_vinsertf128v8sf:
34150 case CODE_FOR_avx_vinsertf128v8si:
34151 case CODE_FOR_avx512f_vinsertf64x4_mask:
34152 case CODE_FOR_avx512f_vinserti64x4_mask:
34153 case CODE_FOR_avx512f_vextractf64x4_mask:
34154 case CODE_FOR_avx512f_vextracti64x4_mask:
34155 error ("the last argument must be a 1-bit immediate");
34156 return const0_rtx;
34158 case CODE_FOR_avx_vmcmpv2df3:
34159 case CODE_FOR_avx_vmcmpv4sf3:
34160 case CODE_FOR_avx_cmpv2df3:
34161 case CODE_FOR_avx_cmpv4sf3:
34162 case CODE_FOR_avx_cmpv4df3:
34163 case CODE_FOR_avx_cmpv8sf3:
34164 case CODE_FOR_avx512f_cmpv8df3_mask:
34165 case CODE_FOR_avx512f_cmpv16sf3_mask:
34166 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34167 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34168 error ("the last argument must be a 5-bit immediate");
34169 return const0_rtx;
34171 default:
34172 switch (nargs_constant)
34174 case 2:
34175 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34176 (!mask_pos && (nargs - i) == nargs_constant))
34178 error ("the next to last argument must be an 8-bit immediate");
34179 break;
34181 case 1:
34182 error ("the last argument must be an 8-bit immediate");
34183 break;
34184 default:
34185 gcc_unreachable ();
34187 return const0_rtx;
34190 else
34192 if (VECTOR_MODE_P (mode))
34193 op = safe_vector_operand (op, mode);
34195 /* If we aren't optimizing, only allow one memory operand to
34196 be generated. */
34197 if (memory_operand (op, mode))
34198 num_memory++;
34200 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34202 if (optimize || !match || num_memory > 1)
34203 op = copy_to_mode_reg (mode, op);
34205 else
34207 op = copy_to_reg (op);
34208 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34212 args[i].op = op;
34213 args[i].mode = mode;
34216 switch (nargs)
34218 case 1:
34219 pat = GEN_FCN (icode) (real_target, args[0].op);
34220 break;
34221 case 2:
34222 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34223 break;
34224 case 3:
34225 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34226 args[2].op);
34227 break;
34228 case 4:
34229 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34230 args[2].op, args[3].op);
34231 break;
34232 case 5:
34233 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34234 args[2].op, args[3].op, args[4].op);
34235 case 6:
34236 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34237 args[2].op, args[3].op, args[4].op,
34238 args[5].op);
34239 break;
34240 default:
34241 gcc_unreachable ();
34244 if (! pat)
34245 return 0;
34247 emit_insn (pat);
34248 return target;
34251 /* Transform pattern of following layout:
34252 (parallel [
34253 set (A B)
34254 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34256 into:
34257 (set (A B))
34260 (parallel [ A B
34262 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34265 into:
34266 (parallel [ A B ... ]) */
34268 static rtx
34269 ix86_erase_embedded_rounding (rtx pat)
34271 if (GET_CODE (pat) == INSN)
34272 pat = PATTERN (pat);
34274 gcc_assert (GET_CODE (pat) == PARALLEL);
34276 if (XVECLEN (pat, 0) == 2)
34278 rtx p0 = XVECEXP (pat, 0, 0);
34279 rtx p1 = XVECEXP (pat, 0, 1);
34281 gcc_assert (GET_CODE (p0) == SET
34282 && GET_CODE (p1) == UNSPEC
34283 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34285 return p0;
34287 else
34289 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34290 int i = 0;
34291 int j = 0;
34293 for (; i < XVECLEN (pat, 0); ++i)
34295 rtx elem = XVECEXP (pat, 0, i);
34296 if (GET_CODE (elem) != UNSPEC
34297 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34298 res [j++] = elem;
34301 /* No more than 1 occurence was removed. */
34302 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34304 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34308 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34309 with rounding. */
34310 static rtx
34311 ix86_expand_sse_comi_round (const struct builtin_description *d,
34312 tree exp, rtx target)
34314 rtx pat, set_dst;
34315 tree arg0 = CALL_EXPR_ARG (exp, 0);
34316 tree arg1 = CALL_EXPR_ARG (exp, 1);
34317 tree arg2 = CALL_EXPR_ARG (exp, 2);
34318 tree arg3 = CALL_EXPR_ARG (exp, 3);
34319 rtx op0 = expand_normal (arg0);
34320 rtx op1 = expand_normal (arg1);
34321 rtx op2 = expand_normal (arg2);
34322 rtx op3 = expand_normal (arg3);
34323 enum insn_code icode = d->icode;
34324 const struct insn_data_d *insn_p = &insn_data[icode];
34325 enum machine_mode mode0 = insn_p->operand[0].mode;
34326 enum machine_mode mode1 = insn_p->operand[1].mode;
34327 enum rtx_code comparison = UNEQ;
34328 bool need_ucomi = false;
34330 /* See avxintrin.h for values. */
34331 enum rtx_code comi_comparisons[32] =
34333 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34334 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34335 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34337 bool need_ucomi_values[32] =
34339 true, false, false, true, true, false, false, true,
34340 true, false, false, true, true, false, false, true,
34341 false, true, true, false, false, true, true, false,
34342 false, true, true, false, false, true, true, false
34345 if (!CONST_INT_P (op2))
34347 error ("the third argument must be comparison constant");
34348 return const0_rtx;
34350 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34352 error ("incorect comparison mode");
34353 return const0_rtx;
34356 if (!insn_p->operand[2].predicate (op3, SImode))
34358 error ("incorrect rounding operand");
34359 return const0_rtx;
34362 comparison = comi_comparisons[INTVAL (op2)];
34363 need_ucomi = need_ucomi_values[INTVAL (op2)];
34365 if (VECTOR_MODE_P (mode0))
34366 op0 = safe_vector_operand (op0, mode0);
34367 if (VECTOR_MODE_P (mode1))
34368 op1 = safe_vector_operand (op1, mode1);
34370 target = gen_reg_rtx (SImode);
34371 emit_move_insn (target, const0_rtx);
34372 target = gen_rtx_SUBREG (QImode, target, 0);
34374 if ((optimize && !register_operand (op0, mode0))
34375 || !insn_p->operand[0].predicate (op0, mode0))
34376 op0 = copy_to_mode_reg (mode0, op0);
34377 if ((optimize && !register_operand (op1, mode1))
34378 || !insn_p->operand[1].predicate (op1, mode1))
34379 op1 = copy_to_mode_reg (mode1, op1);
34381 if (need_ucomi)
34382 icode = icode == CODE_FOR_sse_comi_round
34383 ? CODE_FOR_sse_ucomi_round
34384 : CODE_FOR_sse2_ucomi_round;
34386 pat = GEN_FCN (icode) (op0, op1, op3);
34387 if (! pat)
34388 return 0;
34390 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34391 if (INTVAL (op3) == NO_ROUND)
34393 pat = ix86_erase_embedded_rounding (pat);
34394 if (! pat)
34395 return 0;
34397 set_dst = SET_DEST (pat);
34399 else
34401 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34402 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34405 emit_insn (pat);
34406 emit_insn (gen_rtx_SET (VOIDmode,
34407 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34408 gen_rtx_fmt_ee (comparison, QImode,
34409 set_dst,
34410 const0_rtx)));
34412 return SUBREG_REG (target);
34415 static rtx
34416 ix86_expand_round_builtin (const struct builtin_description *d,
34417 tree exp, rtx target)
34419 rtx pat;
34420 unsigned int i, nargs;
34421 struct
34423 rtx op;
34424 enum machine_mode mode;
34425 } args[6];
34426 enum insn_code icode = d->icode;
34427 const struct insn_data_d *insn_p = &insn_data[icode];
34428 enum machine_mode tmode = insn_p->operand[0].mode;
34429 unsigned int nargs_constant = 0;
34430 unsigned int redundant_embed_rnd = 0;
34432 switch ((enum ix86_builtin_func_type) d->flag)
34434 case UINT64_FTYPE_V2DF_INT:
34435 case UINT64_FTYPE_V4SF_INT:
34436 case UINT_FTYPE_V2DF_INT:
34437 case UINT_FTYPE_V4SF_INT:
34438 case INT64_FTYPE_V2DF_INT:
34439 case INT64_FTYPE_V4SF_INT:
34440 case INT_FTYPE_V2DF_INT:
34441 case INT_FTYPE_V4SF_INT:
34442 nargs = 2;
34443 break;
34444 case V4SF_FTYPE_V4SF_UINT_INT:
34445 case V4SF_FTYPE_V4SF_UINT64_INT:
34446 case V2DF_FTYPE_V2DF_UINT64_INT:
34447 case V4SF_FTYPE_V4SF_INT_INT:
34448 case V4SF_FTYPE_V4SF_INT64_INT:
34449 case V2DF_FTYPE_V2DF_INT64_INT:
34450 case V4SF_FTYPE_V4SF_V4SF_INT:
34451 case V2DF_FTYPE_V2DF_V2DF_INT:
34452 case V4SF_FTYPE_V4SF_V2DF_INT:
34453 case V2DF_FTYPE_V2DF_V4SF_INT:
34454 nargs = 3;
34455 break;
34456 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34457 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34458 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34459 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34460 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34461 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34462 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34463 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34464 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34465 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34466 nargs = 4;
34467 break;
34468 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34469 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34470 nargs_constant = 2;
34471 nargs = 4;
34472 break;
34473 case INT_FTYPE_V4SF_V4SF_INT_INT:
34474 case INT_FTYPE_V2DF_V2DF_INT_INT:
34475 return ix86_expand_sse_comi_round (d, exp, target);
34476 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34477 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34478 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34479 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34480 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34481 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34482 nargs = 5;
34483 break;
34484 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34485 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34486 nargs_constant = 4;
34487 nargs = 5;
34488 break;
34489 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34490 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34491 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34492 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34493 nargs_constant = 3;
34494 nargs = 5;
34495 break;
34496 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34497 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34498 nargs = 6;
34499 nargs_constant = 4;
34500 break;
34501 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34502 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34503 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34504 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34505 nargs = 6;
34506 nargs_constant = 3;
34507 break;
34508 default:
34509 gcc_unreachable ();
34511 gcc_assert (nargs <= ARRAY_SIZE (args));
34513 if (optimize
34514 || target == 0
34515 || GET_MODE (target) != tmode
34516 || !insn_p->operand[0].predicate (target, tmode))
34517 target = gen_reg_rtx (tmode);
34519 for (i = 0; i < nargs; i++)
34521 tree arg = CALL_EXPR_ARG (exp, i);
34522 rtx op = expand_normal (arg);
34523 enum machine_mode mode = insn_p->operand[i + 1].mode;
34524 bool match = insn_p->operand[i + 1].predicate (op, mode);
34526 if (i == nargs - nargs_constant)
34528 if (!match)
34530 switch (icode)
34532 case CODE_FOR_avx512f_getmantv8df_mask_round:
34533 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34534 case CODE_FOR_avx512f_getmantv2df_round:
34535 case CODE_FOR_avx512f_getmantv4sf_round:
34536 error ("the immediate argument must be a 4-bit immediate");
34537 return const0_rtx;
34538 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34539 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34540 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34541 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34542 error ("the immediate argument must be a 5-bit immediate");
34543 return const0_rtx;
34544 default:
34545 error ("the immediate argument must be an 8-bit immediate");
34546 return const0_rtx;
34550 else if (i == nargs-1)
34552 if (!insn_p->operand[nargs].predicate (op, SImode))
34554 error ("incorrect rounding operand");
34555 return const0_rtx;
34558 /* If there is no rounding use normal version of the pattern. */
34559 if (INTVAL (op) == NO_ROUND)
34560 redundant_embed_rnd = 1;
34562 else
34564 if (VECTOR_MODE_P (mode))
34565 op = safe_vector_operand (op, mode);
34567 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34569 if (optimize || !match)
34570 op = copy_to_mode_reg (mode, op);
34572 else
34574 op = copy_to_reg (op);
34575 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34579 args[i].op = op;
34580 args[i].mode = mode;
34583 switch (nargs)
34585 case 1:
34586 pat = GEN_FCN (icode) (target, args[0].op);
34587 break;
34588 case 2:
34589 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34590 break;
34591 case 3:
34592 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34593 args[2].op);
34594 break;
34595 case 4:
34596 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34597 args[2].op, args[3].op);
34598 break;
34599 case 5:
34600 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34601 args[2].op, args[3].op, args[4].op);
34602 case 6:
34603 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34604 args[2].op, args[3].op, args[4].op,
34605 args[5].op);
34606 break;
34607 default:
34608 gcc_unreachable ();
34611 if (!pat)
34612 return 0;
34614 if (redundant_embed_rnd)
34615 pat = ix86_erase_embedded_rounding (pat);
34617 emit_insn (pat);
34618 return target;
34621 /* Subroutine of ix86_expand_builtin to take care of special insns
34622 with variable number of operands. */
34624 static rtx
34625 ix86_expand_special_args_builtin (const struct builtin_description *d,
34626 tree exp, rtx target)
34628 tree arg;
34629 rtx pat, op;
34630 unsigned int i, nargs, arg_adjust, memory;
34631 bool aligned_mem = false;
34632 struct
34634 rtx op;
34635 enum machine_mode mode;
34636 } args[3];
34637 enum insn_code icode = d->icode;
34638 bool last_arg_constant = false;
34639 const struct insn_data_d *insn_p = &insn_data[icode];
34640 enum machine_mode tmode = insn_p->operand[0].mode;
34641 enum { load, store } klass;
34643 switch ((enum ix86_builtin_func_type) d->flag)
34645 case VOID_FTYPE_VOID:
34646 emit_insn (GEN_FCN (icode) (target));
34647 return 0;
34648 case VOID_FTYPE_UINT64:
34649 case VOID_FTYPE_UNSIGNED:
34650 nargs = 0;
34651 klass = store;
34652 memory = 0;
34653 break;
34655 case INT_FTYPE_VOID:
34656 case USHORT_FTYPE_VOID:
34657 case UINT64_FTYPE_VOID:
34658 case UNSIGNED_FTYPE_VOID:
34659 nargs = 0;
34660 klass = load;
34661 memory = 0;
34662 break;
34663 case UINT64_FTYPE_PUNSIGNED:
34664 case V2DI_FTYPE_PV2DI:
34665 case V4DI_FTYPE_PV4DI:
34666 case V32QI_FTYPE_PCCHAR:
34667 case V16QI_FTYPE_PCCHAR:
34668 case V8SF_FTYPE_PCV4SF:
34669 case V8SF_FTYPE_PCFLOAT:
34670 case V4SF_FTYPE_PCFLOAT:
34671 case V4DF_FTYPE_PCV2DF:
34672 case V4DF_FTYPE_PCDOUBLE:
34673 case V2DF_FTYPE_PCDOUBLE:
34674 case VOID_FTYPE_PVOID:
34675 case V16SI_FTYPE_PV4SI:
34676 case V16SF_FTYPE_PV4SF:
34677 case V8DI_FTYPE_PV4DI:
34678 case V8DI_FTYPE_PV8DI:
34679 case V8DF_FTYPE_PV4DF:
34680 nargs = 1;
34681 klass = load;
34682 memory = 0;
34683 switch (icode)
34685 case CODE_FOR_sse4_1_movntdqa:
34686 case CODE_FOR_avx2_movntdqa:
34687 case CODE_FOR_avx512f_movntdqa:
34688 aligned_mem = true;
34689 break;
34690 default:
34691 break;
34693 break;
34694 case VOID_FTYPE_PV2SF_V4SF:
34695 case VOID_FTYPE_PV8DI_V8DI:
34696 case VOID_FTYPE_PV4DI_V4DI:
34697 case VOID_FTYPE_PV2DI_V2DI:
34698 case VOID_FTYPE_PCHAR_V32QI:
34699 case VOID_FTYPE_PCHAR_V16QI:
34700 case VOID_FTYPE_PFLOAT_V16SF:
34701 case VOID_FTYPE_PFLOAT_V8SF:
34702 case VOID_FTYPE_PFLOAT_V4SF:
34703 case VOID_FTYPE_PDOUBLE_V8DF:
34704 case VOID_FTYPE_PDOUBLE_V4DF:
34705 case VOID_FTYPE_PDOUBLE_V2DF:
34706 case VOID_FTYPE_PLONGLONG_LONGLONG:
34707 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34708 case VOID_FTYPE_PINT_INT:
34709 nargs = 1;
34710 klass = store;
34711 /* Reserve memory operand for target. */
34712 memory = ARRAY_SIZE (args);
34713 switch (icode)
34715 /* These builtins and instructions require the memory
34716 to be properly aligned. */
34717 case CODE_FOR_avx_movntv4di:
34718 case CODE_FOR_sse2_movntv2di:
34719 case CODE_FOR_avx_movntv8sf:
34720 case CODE_FOR_sse_movntv4sf:
34721 case CODE_FOR_sse4a_vmmovntv4sf:
34722 case CODE_FOR_avx_movntv4df:
34723 case CODE_FOR_sse2_movntv2df:
34724 case CODE_FOR_sse4a_vmmovntv2df:
34725 case CODE_FOR_sse2_movntidi:
34726 case CODE_FOR_sse_movntq:
34727 case CODE_FOR_sse2_movntisi:
34728 case CODE_FOR_avx512f_movntv16sf:
34729 case CODE_FOR_avx512f_movntv8df:
34730 case CODE_FOR_avx512f_movntv8di:
34731 aligned_mem = true;
34732 break;
34733 default:
34734 break;
34736 break;
34737 case V4SF_FTYPE_V4SF_PCV2SF:
34738 case V2DF_FTYPE_V2DF_PCDOUBLE:
34739 nargs = 2;
34740 klass = load;
34741 memory = 1;
34742 break;
34743 case V8SF_FTYPE_PCV8SF_V8SI:
34744 case V4DF_FTYPE_PCV4DF_V4DI:
34745 case V4SF_FTYPE_PCV4SF_V4SI:
34746 case V2DF_FTYPE_PCV2DF_V2DI:
34747 case V8SI_FTYPE_PCV8SI_V8SI:
34748 case V4DI_FTYPE_PCV4DI_V4DI:
34749 case V4SI_FTYPE_PCV4SI_V4SI:
34750 case V2DI_FTYPE_PCV2DI_V2DI:
34751 nargs = 2;
34752 klass = load;
34753 memory = 0;
34754 break;
34755 case VOID_FTYPE_PV8DF_V8DF_QI:
34756 case VOID_FTYPE_PV16SF_V16SF_HI:
34757 case VOID_FTYPE_PV8DI_V8DI_QI:
34758 case VOID_FTYPE_PV16SI_V16SI_HI:
34759 switch (icode)
34761 /* These builtins and instructions require the memory
34762 to be properly aligned. */
34763 case CODE_FOR_avx512f_storev16sf_mask:
34764 case CODE_FOR_avx512f_storev16si_mask:
34765 case CODE_FOR_avx512f_storev8df_mask:
34766 case CODE_FOR_avx512f_storev8di_mask:
34767 case CODE_FOR_avx512vl_storev8sf_mask:
34768 case CODE_FOR_avx512vl_storev8si_mask:
34769 case CODE_FOR_avx512vl_storev4df_mask:
34770 case CODE_FOR_avx512vl_storev4di_mask:
34771 case CODE_FOR_avx512vl_storev4sf_mask:
34772 case CODE_FOR_avx512vl_storev4si_mask:
34773 case CODE_FOR_avx512vl_storev2df_mask:
34774 case CODE_FOR_avx512vl_storev2di_mask:
34775 aligned_mem = true;
34776 break;
34777 default:
34778 break;
34780 /* FALLTHRU */
34781 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34782 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34783 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34784 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34785 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34786 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34787 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34788 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34789 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34790 case VOID_FTYPE_PFLOAT_V4SF_QI:
34791 case VOID_FTYPE_PV8SI_V8DI_QI:
34792 case VOID_FTYPE_PV8HI_V8DI_QI:
34793 case VOID_FTYPE_PV16HI_V16SI_HI:
34794 case VOID_FTYPE_PV16QI_V8DI_QI:
34795 case VOID_FTYPE_PV16QI_V16SI_HI:
34796 nargs = 2;
34797 klass = store;
34798 /* Reserve memory operand for target. */
34799 memory = ARRAY_SIZE (args);
34800 break;
34801 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34802 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34803 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34804 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34805 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34806 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34807 nargs = 3;
34808 klass = load;
34809 memory = 0;
34810 switch (icode)
34812 /* These builtins and instructions require the memory
34813 to be properly aligned. */
34814 case CODE_FOR_avx512f_loadv16sf_mask:
34815 case CODE_FOR_avx512f_loadv16si_mask:
34816 case CODE_FOR_avx512f_loadv8df_mask:
34817 case CODE_FOR_avx512f_loadv8di_mask:
34818 case CODE_FOR_avx512vl_loadv8sf_mask:
34819 case CODE_FOR_avx512vl_loadv8si_mask:
34820 case CODE_FOR_avx512vl_loadv4df_mask:
34821 case CODE_FOR_avx512vl_loadv4di_mask:
34822 case CODE_FOR_avx512vl_loadv4sf_mask:
34823 case CODE_FOR_avx512vl_loadv4si_mask:
34824 case CODE_FOR_avx512vl_loadv2df_mask:
34825 case CODE_FOR_avx512vl_loadv2di_mask:
34826 case CODE_FOR_avx512bw_loadv64qi_mask:
34827 case CODE_FOR_avx512vl_loadv32qi_mask:
34828 case CODE_FOR_avx512vl_loadv16qi_mask:
34829 case CODE_FOR_avx512bw_loadv32hi_mask:
34830 case CODE_FOR_avx512vl_loadv16hi_mask:
34831 case CODE_FOR_avx512vl_loadv8hi_mask:
34832 aligned_mem = true;
34833 break;
34834 default:
34835 break;
34837 break;
34838 case VOID_FTYPE_UINT_UINT_UINT:
34839 case VOID_FTYPE_UINT64_UINT_UINT:
34840 case UCHAR_FTYPE_UINT_UINT_UINT:
34841 case UCHAR_FTYPE_UINT64_UINT_UINT:
34842 nargs = 3;
34843 klass = load;
34844 memory = ARRAY_SIZE (args);
34845 last_arg_constant = true;
34846 break;
34847 default:
34848 gcc_unreachable ();
34851 gcc_assert (nargs <= ARRAY_SIZE (args));
34853 if (klass == store)
34855 arg = CALL_EXPR_ARG (exp, 0);
34856 op = expand_normal (arg);
34857 gcc_assert (target == 0);
34858 if (memory)
34860 op = ix86_zero_extend_to_Pmode (op);
34861 target = gen_rtx_MEM (tmode, op);
34862 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34863 on it. Try to improve it using get_pointer_alignment,
34864 and if the special builtin is one that requires strict
34865 mode alignment, also from it's GET_MODE_ALIGNMENT.
34866 Failure to do so could lead to ix86_legitimate_combined_insn
34867 rejecting all changes to such insns. */
34868 unsigned int align = get_pointer_alignment (arg);
34869 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34870 align = GET_MODE_ALIGNMENT (tmode);
34871 if (MEM_ALIGN (target) < align)
34872 set_mem_align (target, align);
34874 else
34875 target = force_reg (tmode, op);
34876 arg_adjust = 1;
34878 else
34880 arg_adjust = 0;
34881 if (optimize
34882 || target == 0
34883 || !register_operand (target, tmode)
34884 || GET_MODE (target) != tmode)
34885 target = gen_reg_rtx (tmode);
34888 for (i = 0; i < nargs; i++)
34890 enum machine_mode mode = insn_p->operand[i + 1].mode;
34891 bool match;
34893 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34894 op = expand_normal (arg);
34895 match = insn_p->operand[i + 1].predicate (op, mode);
34897 if (last_arg_constant && (i + 1) == nargs)
34899 if (!match)
34901 if (icode == CODE_FOR_lwp_lwpvalsi3
34902 || icode == CODE_FOR_lwp_lwpinssi3
34903 || icode == CODE_FOR_lwp_lwpvaldi3
34904 || icode == CODE_FOR_lwp_lwpinsdi3)
34905 error ("the last argument must be a 32-bit immediate");
34906 else
34907 error ("the last argument must be an 8-bit immediate");
34908 return const0_rtx;
34911 else
34913 if (i == memory)
34915 /* This must be the memory operand. */
34916 op = ix86_zero_extend_to_Pmode (op);
34917 op = gen_rtx_MEM (mode, op);
34918 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34919 on it. Try to improve it using get_pointer_alignment,
34920 and if the special builtin is one that requires strict
34921 mode alignment, also from it's GET_MODE_ALIGNMENT.
34922 Failure to do so could lead to ix86_legitimate_combined_insn
34923 rejecting all changes to such insns. */
34924 unsigned int align = get_pointer_alignment (arg);
34925 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34926 align = GET_MODE_ALIGNMENT (mode);
34927 if (MEM_ALIGN (op) < align)
34928 set_mem_align (op, align);
34930 else
34932 /* This must be register. */
34933 if (VECTOR_MODE_P (mode))
34934 op = safe_vector_operand (op, mode);
34936 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34937 op = copy_to_mode_reg (mode, op);
34938 else
34940 op = copy_to_reg (op);
34941 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34946 args[i].op = op;
34947 args[i].mode = mode;
34950 switch (nargs)
34952 case 0:
34953 pat = GEN_FCN (icode) (target);
34954 break;
34955 case 1:
34956 pat = GEN_FCN (icode) (target, args[0].op);
34957 break;
34958 case 2:
34959 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34960 break;
34961 case 3:
34962 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34963 break;
34964 default:
34965 gcc_unreachable ();
34968 if (! pat)
34969 return 0;
34970 emit_insn (pat);
34971 return klass == store ? 0 : target;
34974 /* Return the integer constant in ARG. Constrain it to be in the range
34975 of the subparts of VEC_TYPE; issue an error if not. */
34977 static int
34978 get_element_number (tree vec_type, tree arg)
34980 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34982 if (!tree_fits_uhwi_p (arg)
34983 || (elt = tree_to_uhwi (arg), elt > max))
34985 error ("selector must be an integer constant in the range 0..%wi", max);
34986 return 0;
34989 return elt;
34992 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34993 ix86_expand_vector_init. We DO have language-level syntax for this, in
34994 the form of (type){ init-list }. Except that since we can't place emms
34995 instructions from inside the compiler, we can't allow the use of MMX
34996 registers unless the user explicitly asks for it. So we do *not* define
34997 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34998 we have builtins invoked by mmintrin.h that gives us license to emit
34999 these sorts of instructions. */
35001 static rtx
35002 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
35004 enum machine_mode tmode = TYPE_MODE (type);
35005 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
35006 int i, n_elt = GET_MODE_NUNITS (tmode);
35007 rtvec v = rtvec_alloc (n_elt);
35009 gcc_assert (VECTOR_MODE_P (tmode));
35010 gcc_assert (call_expr_nargs (exp) == n_elt);
35012 for (i = 0; i < n_elt; ++i)
35014 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
35015 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
35018 if (!target || !register_operand (target, tmode))
35019 target = gen_reg_rtx (tmode);
35021 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35022 return target;
35025 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35026 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35027 had a language-level syntax for referencing vector elements. */
35029 static rtx
35030 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35032 enum machine_mode tmode, mode0;
35033 tree arg0, arg1;
35034 int elt;
35035 rtx op0;
35037 arg0 = CALL_EXPR_ARG (exp, 0);
35038 arg1 = CALL_EXPR_ARG (exp, 1);
35040 op0 = expand_normal (arg0);
35041 elt = get_element_number (TREE_TYPE (arg0), arg1);
35043 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35044 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35045 gcc_assert (VECTOR_MODE_P (mode0));
35047 op0 = force_reg (mode0, op0);
35049 if (optimize || !target || !register_operand (target, tmode))
35050 target = gen_reg_rtx (tmode);
35052 ix86_expand_vector_extract (true, target, op0, elt);
35054 return target;
35057 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35058 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35059 a language-level syntax for referencing vector elements. */
35061 static rtx
35062 ix86_expand_vec_set_builtin (tree exp)
35064 enum machine_mode tmode, mode1;
35065 tree arg0, arg1, arg2;
35066 int elt;
35067 rtx op0, op1, target;
35069 arg0 = CALL_EXPR_ARG (exp, 0);
35070 arg1 = CALL_EXPR_ARG (exp, 1);
35071 arg2 = CALL_EXPR_ARG (exp, 2);
35073 tmode = TYPE_MODE (TREE_TYPE (arg0));
35074 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35075 gcc_assert (VECTOR_MODE_P (tmode));
35077 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35078 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35079 elt = get_element_number (TREE_TYPE (arg0), arg2);
35081 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35082 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35084 op0 = force_reg (tmode, op0);
35085 op1 = force_reg (mode1, op1);
35087 /* OP0 is the source of these builtin functions and shouldn't be
35088 modified. Create a copy, use it and return it as target. */
35089 target = gen_reg_rtx (tmode);
35090 emit_move_insn (target, op0);
35091 ix86_expand_vector_set (true, target, op1, elt);
35093 return target;
35096 /* Expand an expression EXP that calls a built-in function,
35097 with result going to TARGET if that's convenient
35098 (and in mode MODE if that's convenient).
35099 SUBTARGET may be used as the target for computing one of EXP's operands.
35100 IGNORE is nonzero if the value is to be ignored. */
35102 static rtx
35103 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35104 enum machine_mode mode, int ignore)
35106 const struct builtin_description *d;
35107 size_t i;
35108 enum insn_code icode;
35109 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35110 tree arg0, arg1, arg2, arg3, arg4;
35111 rtx op0, op1, op2, op3, op4, pat, insn;
35112 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35113 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35115 /* For CPU builtins that can be folded, fold first and expand the fold. */
35116 switch (fcode)
35118 case IX86_BUILTIN_CPU_INIT:
35120 /* Make it call __cpu_indicator_init in libgcc. */
35121 tree call_expr, fndecl, type;
35122 type = build_function_type_list (integer_type_node, NULL_TREE);
35123 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35124 call_expr = build_call_expr (fndecl, 0);
35125 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35127 case IX86_BUILTIN_CPU_IS:
35128 case IX86_BUILTIN_CPU_SUPPORTS:
35130 tree arg0 = CALL_EXPR_ARG (exp, 0);
35131 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35132 gcc_assert (fold_expr != NULL_TREE);
35133 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35137 /* Determine whether the builtin function is available under the current ISA.
35138 Originally the builtin was not created if it wasn't applicable to the
35139 current ISA based on the command line switches. With function specific
35140 options, we need to check in the context of the function making the call
35141 whether it is supported. */
35142 if (ix86_builtins_isa[fcode].isa
35143 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35145 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35146 NULL, (enum fpmath_unit) 0, false);
35148 if (!opts)
35149 error ("%qE needs unknown isa option", fndecl);
35150 else
35152 gcc_assert (opts != NULL);
35153 error ("%qE needs isa option %s", fndecl, opts);
35154 free (opts);
35156 return const0_rtx;
35159 switch (fcode)
35161 case IX86_BUILTIN_MASKMOVQ:
35162 case IX86_BUILTIN_MASKMOVDQU:
35163 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35164 ? CODE_FOR_mmx_maskmovq
35165 : CODE_FOR_sse2_maskmovdqu);
35166 /* Note the arg order is different from the operand order. */
35167 arg1 = CALL_EXPR_ARG (exp, 0);
35168 arg2 = CALL_EXPR_ARG (exp, 1);
35169 arg0 = CALL_EXPR_ARG (exp, 2);
35170 op0 = expand_normal (arg0);
35171 op1 = expand_normal (arg1);
35172 op2 = expand_normal (arg2);
35173 mode0 = insn_data[icode].operand[0].mode;
35174 mode1 = insn_data[icode].operand[1].mode;
35175 mode2 = insn_data[icode].operand[2].mode;
35177 op0 = ix86_zero_extend_to_Pmode (op0);
35178 op0 = gen_rtx_MEM (mode1, op0);
35180 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35181 op0 = copy_to_mode_reg (mode0, op0);
35182 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35183 op1 = copy_to_mode_reg (mode1, op1);
35184 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35185 op2 = copy_to_mode_reg (mode2, op2);
35186 pat = GEN_FCN (icode) (op0, op1, op2);
35187 if (! pat)
35188 return 0;
35189 emit_insn (pat);
35190 return 0;
35192 case IX86_BUILTIN_LDMXCSR:
35193 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35194 target = assign_386_stack_local (SImode, SLOT_TEMP);
35195 emit_move_insn (target, op0);
35196 emit_insn (gen_sse_ldmxcsr (target));
35197 return 0;
35199 case IX86_BUILTIN_STMXCSR:
35200 target = assign_386_stack_local (SImode, SLOT_TEMP);
35201 emit_insn (gen_sse_stmxcsr (target));
35202 return copy_to_mode_reg (SImode, target);
35204 case IX86_BUILTIN_CLFLUSH:
35205 arg0 = CALL_EXPR_ARG (exp, 0);
35206 op0 = expand_normal (arg0);
35207 icode = CODE_FOR_sse2_clflush;
35208 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35209 op0 = ix86_zero_extend_to_Pmode (op0);
35211 emit_insn (gen_sse2_clflush (op0));
35212 return 0;
35214 case IX86_BUILTIN_CLFLUSHOPT:
35215 arg0 = CALL_EXPR_ARG (exp, 0);
35216 op0 = expand_normal (arg0);
35217 icode = CODE_FOR_clflushopt;
35218 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35219 op0 = ix86_zero_extend_to_Pmode (op0);
35221 emit_insn (gen_clflushopt (op0));
35222 return 0;
35224 case IX86_BUILTIN_MONITOR:
35225 arg0 = CALL_EXPR_ARG (exp, 0);
35226 arg1 = CALL_EXPR_ARG (exp, 1);
35227 arg2 = CALL_EXPR_ARG (exp, 2);
35228 op0 = expand_normal (arg0);
35229 op1 = expand_normal (arg1);
35230 op2 = expand_normal (arg2);
35231 if (!REG_P (op0))
35232 op0 = ix86_zero_extend_to_Pmode (op0);
35233 if (!REG_P (op1))
35234 op1 = copy_to_mode_reg (SImode, op1);
35235 if (!REG_P (op2))
35236 op2 = copy_to_mode_reg (SImode, op2);
35237 emit_insn (ix86_gen_monitor (op0, op1, op2));
35238 return 0;
35240 case IX86_BUILTIN_MWAIT:
35241 arg0 = CALL_EXPR_ARG (exp, 0);
35242 arg1 = CALL_EXPR_ARG (exp, 1);
35243 op0 = expand_normal (arg0);
35244 op1 = expand_normal (arg1);
35245 if (!REG_P (op0))
35246 op0 = copy_to_mode_reg (SImode, op0);
35247 if (!REG_P (op1))
35248 op1 = copy_to_mode_reg (SImode, op1);
35249 emit_insn (gen_sse3_mwait (op0, op1));
35250 return 0;
35252 case IX86_BUILTIN_VEC_INIT_V2SI:
35253 case IX86_BUILTIN_VEC_INIT_V4HI:
35254 case IX86_BUILTIN_VEC_INIT_V8QI:
35255 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35257 case IX86_BUILTIN_VEC_EXT_V2DF:
35258 case IX86_BUILTIN_VEC_EXT_V2DI:
35259 case IX86_BUILTIN_VEC_EXT_V4SF:
35260 case IX86_BUILTIN_VEC_EXT_V4SI:
35261 case IX86_BUILTIN_VEC_EXT_V8HI:
35262 case IX86_BUILTIN_VEC_EXT_V2SI:
35263 case IX86_BUILTIN_VEC_EXT_V4HI:
35264 case IX86_BUILTIN_VEC_EXT_V16QI:
35265 return ix86_expand_vec_ext_builtin (exp, target);
35267 case IX86_BUILTIN_VEC_SET_V2DI:
35268 case IX86_BUILTIN_VEC_SET_V4SF:
35269 case IX86_BUILTIN_VEC_SET_V4SI:
35270 case IX86_BUILTIN_VEC_SET_V8HI:
35271 case IX86_BUILTIN_VEC_SET_V4HI:
35272 case IX86_BUILTIN_VEC_SET_V16QI:
35273 return ix86_expand_vec_set_builtin (exp);
35275 case IX86_BUILTIN_INFQ:
35276 case IX86_BUILTIN_HUGE_VALQ:
35278 REAL_VALUE_TYPE inf;
35279 rtx tmp;
35281 real_inf (&inf);
35282 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35284 tmp = validize_mem (force_const_mem (mode, tmp));
35286 if (target == 0)
35287 target = gen_reg_rtx (mode);
35289 emit_move_insn (target, tmp);
35290 return target;
35293 case IX86_BUILTIN_RDPMC:
35294 case IX86_BUILTIN_RDTSC:
35295 case IX86_BUILTIN_RDTSCP:
35297 op0 = gen_reg_rtx (DImode);
35298 op1 = gen_reg_rtx (DImode);
35300 if (fcode == IX86_BUILTIN_RDPMC)
35302 arg0 = CALL_EXPR_ARG (exp, 0);
35303 op2 = expand_normal (arg0);
35304 if (!register_operand (op2, SImode))
35305 op2 = copy_to_mode_reg (SImode, op2);
35307 insn = (TARGET_64BIT
35308 ? gen_rdpmc_rex64 (op0, op1, op2)
35309 : gen_rdpmc (op0, op2));
35310 emit_insn (insn);
35312 else if (fcode == IX86_BUILTIN_RDTSC)
35314 insn = (TARGET_64BIT
35315 ? gen_rdtsc_rex64 (op0, op1)
35316 : gen_rdtsc (op0));
35317 emit_insn (insn);
35319 else
35321 op2 = gen_reg_rtx (SImode);
35323 insn = (TARGET_64BIT
35324 ? gen_rdtscp_rex64 (op0, op1, op2)
35325 : gen_rdtscp (op0, op2));
35326 emit_insn (insn);
35328 arg0 = CALL_EXPR_ARG (exp, 0);
35329 op4 = expand_normal (arg0);
35330 if (!address_operand (op4, VOIDmode))
35332 op4 = convert_memory_address (Pmode, op4);
35333 op4 = copy_addr_to_reg (op4);
35335 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35338 if (target == 0)
35340 /* mode is VOIDmode if __builtin_rd* has been called
35341 without lhs. */
35342 if (mode == VOIDmode)
35343 return target;
35344 target = gen_reg_rtx (mode);
35347 if (TARGET_64BIT)
35349 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35350 op1, 1, OPTAB_DIRECT);
35351 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35352 op0, 1, OPTAB_DIRECT);
35355 emit_move_insn (target, op0);
35356 return target;
35358 case IX86_BUILTIN_FXSAVE:
35359 case IX86_BUILTIN_FXRSTOR:
35360 case IX86_BUILTIN_FXSAVE64:
35361 case IX86_BUILTIN_FXRSTOR64:
35362 case IX86_BUILTIN_FNSTENV:
35363 case IX86_BUILTIN_FLDENV:
35364 mode0 = BLKmode;
35365 switch (fcode)
35367 case IX86_BUILTIN_FXSAVE:
35368 icode = CODE_FOR_fxsave;
35369 break;
35370 case IX86_BUILTIN_FXRSTOR:
35371 icode = CODE_FOR_fxrstor;
35372 break;
35373 case IX86_BUILTIN_FXSAVE64:
35374 icode = CODE_FOR_fxsave64;
35375 break;
35376 case IX86_BUILTIN_FXRSTOR64:
35377 icode = CODE_FOR_fxrstor64;
35378 break;
35379 case IX86_BUILTIN_FNSTENV:
35380 icode = CODE_FOR_fnstenv;
35381 break;
35382 case IX86_BUILTIN_FLDENV:
35383 icode = CODE_FOR_fldenv;
35384 break;
35385 default:
35386 gcc_unreachable ();
35389 arg0 = CALL_EXPR_ARG (exp, 0);
35390 op0 = expand_normal (arg0);
35392 if (!address_operand (op0, VOIDmode))
35394 op0 = convert_memory_address (Pmode, op0);
35395 op0 = copy_addr_to_reg (op0);
35397 op0 = gen_rtx_MEM (mode0, op0);
35399 pat = GEN_FCN (icode) (op0);
35400 if (pat)
35401 emit_insn (pat);
35402 return 0;
35404 case IX86_BUILTIN_XSAVE:
35405 case IX86_BUILTIN_XRSTOR:
35406 case IX86_BUILTIN_XSAVE64:
35407 case IX86_BUILTIN_XRSTOR64:
35408 case IX86_BUILTIN_XSAVEOPT:
35409 case IX86_BUILTIN_XSAVEOPT64:
35410 case IX86_BUILTIN_XSAVES:
35411 case IX86_BUILTIN_XRSTORS:
35412 case IX86_BUILTIN_XSAVES64:
35413 case IX86_BUILTIN_XRSTORS64:
35414 case IX86_BUILTIN_XSAVEC:
35415 case IX86_BUILTIN_XSAVEC64:
35416 arg0 = CALL_EXPR_ARG (exp, 0);
35417 arg1 = CALL_EXPR_ARG (exp, 1);
35418 op0 = expand_normal (arg0);
35419 op1 = expand_normal (arg1);
35421 if (!address_operand (op0, VOIDmode))
35423 op0 = convert_memory_address (Pmode, op0);
35424 op0 = copy_addr_to_reg (op0);
35426 op0 = gen_rtx_MEM (BLKmode, op0);
35428 op1 = force_reg (DImode, op1);
35430 if (TARGET_64BIT)
35432 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35433 NULL, 1, OPTAB_DIRECT);
35434 switch (fcode)
35436 case IX86_BUILTIN_XSAVE:
35437 icode = CODE_FOR_xsave_rex64;
35438 break;
35439 case IX86_BUILTIN_XRSTOR:
35440 icode = CODE_FOR_xrstor_rex64;
35441 break;
35442 case IX86_BUILTIN_XSAVE64:
35443 icode = CODE_FOR_xsave64;
35444 break;
35445 case IX86_BUILTIN_XRSTOR64:
35446 icode = CODE_FOR_xrstor64;
35447 break;
35448 case IX86_BUILTIN_XSAVEOPT:
35449 icode = CODE_FOR_xsaveopt_rex64;
35450 break;
35451 case IX86_BUILTIN_XSAVEOPT64:
35452 icode = CODE_FOR_xsaveopt64;
35453 break;
35454 case IX86_BUILTIN_XSAVES:
35455 icode = CODE_FOR_xsaves_rex64;
35456 break;
35457 case IX86_BUILTIN_XRSTORS:
35458 icode = CODE_FOR_xrstors_rex64;
35459 break;
35460 case IX86_BUILTIN_XSAVES64:
35461 icode = CODE_FOR_xsaves64;
35462 break;
35463 case IX86_BUILTIN_XRSTORS64:
35464 icode = CODE_FOR_xrstors64;
35465 break;
35466 case IX86_BUILTIN_XSAVEC:
35467 icode = CODE_FOR_xsavec_rex64;
35468 break;
35469 case IX86_BUILTIN_XSAVEC64:
35470 icode = CODE_FOR_xsavec64;
35471 break;
35472 default:
35473 gcc_unreachable ();
35476 op2 = gen_lowpart (SImode, op2);
35477 op1 = gen_lowpart (SImode, op1);
35478 pat = GEN_FCN (icode) (op0, op1, op2);
35480 else
35482 switch (fcode)
35484 case IX86_BUILTIN_XSAVE:
35485 icode = CODE_FOR_xsave;
35486 break;
35487 case IX86_BUILTIN_XRSTOR:
35488 icode = CODE_FOR_xrstor;
35489 break;
35490 case IX86_BUILTIN_XSAVEOPT:
35491 icode = CODE_FOR_xsaveopt;
35492 break;
35493 case IX86_BUILTIN_XSAVES:
35494 icode = CODE_FOR_xsaves;
35495 break;
35496 case IX86_BUILTIN_XRSTORS:
35497 icode = CODE_FOR_xrstors;
35498 break;
35499 case IX86_BUILTIN_XSAVEC:
35500 icode = CODE_FOR_xsavec;
35501 break;
35502 default:
35503 gcc_unreachable ();
35505 pat = GEN_FCN (icode) (op0, op1);
35508 if (pat)
35509 emit_insn (pat);
35510 return 0;
35512 case IX86_BUILTIN_LLWPCB:
35513 arg0 = CALL_EXPR_ARG (exp, 0);
35514 op0 = expand_normal (arg0);
35515 icode = CODE_FOR_lwp_llwpcb;
35516 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35517 op0 = ix86_zero_extend_to_Pmode (op0);
35518 emit_insn (gen_lwp_llwpcb (op0));
35519 return 0;
35521 case IX86_BUILTIN_SLWPCB:
35522 icode = CODE_FOR_lwp_slwpcb;
35523 if (!target
35524 || !insn_data[icode].operand[0].predicate (target, Pmode))
35525 target = gen_reg_rtx (Pmode);
35526 emit_insn (gen_lwp_slwpcb (target));
35527 return target;
35529 case IX86_BUILTIN_BEXTRI32:
35530 case IX86_BUILTIN_BEXTRI64:
35531 arg0 = CALL_EXPR_ARG (exp, 0);
35532 arg1 = CALL_EXPR_ARG (exp, 1);
35533 op0 = expand_normal (arg0);
35534 op1 = expand_normal (arg1);
35535 icode = (fcode == IX86_BUILTIN_BEXTRI32
35536 ? CODE_FOR_tbm_bextri_si
35537 : CODE_FOR_tbm_bextri_di);
35538 if (!CONST_INT_P (op1))
35540 error ("last argument must be an immediate");
35541 return const0_rtx;
35543 else
35545 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35546 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35547 op1 = GEN_INT (length);
35548 op2 = GEN_INT (lsb_index);
35549 pat = GEN_FCN (icode) (target, op0, op1, op2);
35550 if (pat)
35551 emit_insn (pat);
35552 return target;
35555 case IX86_BUILTIN_RDRAND16_STEP:
35556 icode = CODE_FOR_rdrandhi_1;
35557 mode0 = HImode;
35558 goto rdrand_step;
35560 case IX86_BUILTIN_RDRAND32_STEP:
35561 icode = CODE_FOR_rdrandsi_1;
35562 mode0 = SImode;
35563 goto rdrand_step;
35565 case IX86_BUILTIN_RDRAND64_STEP:
35566 icode = CODE_FOR_rdranddi_1;
35567 mode0 = DImode;
35569 rdrand_step:
35570 op0 = gen_reg_rtx (mode0);
35571 emit_insn (GEN_FCN (icode) (op0));
35573 arg0 = CALL_EXPR_ARG (exp, 0);
35574 op1 = expand_normal (arg0);
35575 if (!address_operand (op1, VOIDmode))
35577 op1 = convert_memory_address (Pmode, op1);
35578 op1 = copy_addr_to_reg (op1);
35580 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35582 op1 = gen_reg_rtx (SImode);
35583 emit_move_insn (op1, CONST1_RTX (SImode));
35585 /* Emit SImode conditional move. */
35586 if (mode0 == HImode)
35588 op2 = gen_reg_rtx (SImode);
35589 emit_insn (gen_zero_extendhisi2 (op2, op0));
35591 else if (mode0 == SImode)
35592 op2 = op0;
35593 else
35594 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35596 if (target == 0
35597 || !register_operand (target, SImode))
35598 target = gen_reg_rtx (SImode);
35600 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35601 const0_rtx);
35602 emit_insn (gen_rtx_SET (VOIDmode, target,
35603 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35604 return target;
35606 case IX86_BUILTIN_RDSEED16_STEP:
35607 icode = CODE_FOR_rdseedhi_1;
35608 mode0 = HImode;
35609 goto rdseed_step;
35611 case IX86_BUILTIN_RDSEED32_STEP:
35612 icode = CODE_FOR_rdseedsi_1;
35613 mode0 = SImode;
35614 goto rdseed_step;
35616 case IX86_BUILTIN_RDSEED64_STEP:
35617 icode = CODE_FOR_rdseeddi_1;
35618 mode0 = DImode;
35620 rdseed_step:
35621 op0 = gen_reg_rtx (mode0);
35622 emit_insn (GEN_FCN (icode) (op0));
35624 arg0 = CALL_EXPR_ARG (exp, 0);
35625 op1 = expand_normal (arg0);
35626 if (!address_operand (op1, VOIDmode))
35628 op1 = convert_memory_address (Pmode, op1);
35629 op1 = copy_addr_to_reg (op1);
35631 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35633 op2 = gen_reg_rtx (QImode);
35635 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35636 const0_rtx);
35637 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35639 if (target == 0
35640 || !register_operand (target, SImode))
35641 target = gen_reg_rtx (SImode);
35643 emit_insn (gen_zero_extendqisi2 (target, op2));
35644 return target;
35646 case IX86_BUILTIN_ADDCARRYX32:
35647 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35648 mode0 = SImode;
35649 goto addcarryx;
35651 case IX86_BUILTIN_ADDCARRYX64:
35652 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35653 mode0 = DImode;
35655 addcarryx:
35656 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35657 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35658 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35659 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35661 op0 = gen_reg_rtx (QImode);
35663 /* Generate CF from input operand. */
35664 op1 = expand_normal (arg0);
35665 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35666 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35668 /* Gen ADCX instruction to compute X+Y+CF. */
35669 op2 = expand_normal (arg1);
35670 op3 = expand_normal (arg2);
35672 if (!REG_P (op2))
35673 op2 = copy_to_mode_reg (mode0, op2);
35674 if (!REG_P (op3))
35675 op3 = copy_to_mode_reg (mode0, op3);
35677 op0 = gen_reg_rtx (mode0);
35679 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35680 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35681 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35683 /* Store the result. */
35684 op4 = expand_normal (arg3);
35685 if (!address_operand (op4, VOIDmode))
35687 op4 = convert_memory_address (Pmode, op4);
35688 op4 = copy_addr_to_reg (op4);
35690 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35692 /* Return current CF value. */
35693 if (target == 0)
35694 target = gen_reg_rtx (QImode);
35696 PUT_MODE (pat, QImode);
35697 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35698 return target;
35700 case IX86_BUILTIN_READ_FLAGS:
35701 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35703 if (optimize
35704 || target == NULL_RTX
35705 || !nonimmediate_operand (target, word_mode)
35706 || GET_MODE (target) != word_mode)
35707 target = gen_reg_rtx (word_mode);
35709 emit_insn (gen_pop (target));
35710 return target;
35712 case IX86_BUILTIN_WRITE_FLAGS:
35714 arg0 = CALL_EXPR_ARG (exp, 0);
35715 op0 = expand_normal (arg0);
35716 if (!general_no_elim_operand (op0, word_mode))
35717 op0 = copy_to_mode_reg (word_mode, op0);
35719 emit_insn (gen_push (op0));
35720 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35721 return 0;
35723 case IX86_BUILTIN_KORTESTC16:
35724 icode = CODE_FOR_kortestchi;
35725 mode0 = HImode;
35726 mode1 = CCCmode;
35727 goto kortest;
35729 case IX86_BUILTIN_KORTESTZ16:
35730 icode = CODE_FOR_kortestzhi;
35731 mode0 = HImode;
35732 mode1 = CCZmode;
35734 kortest:
35735 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35736 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35737 op0 = expand_normal (arg0);
35738 op1 = expand_normal (arg1);
35740 op0 = copy_to_reg (op0);
35741 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35742 op1 = copy_to_reg (op1);
35743 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35745 target = gen_reg_rtx (QImode);
35746 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35748 /* Emit kortest. */
35749 emit_insn (GEN_FCN (icode) (op0, op1));
35750 /* And use setcc to return result from flags. */
35751 ix86_expand_setcc (target, EQ,
35752 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35753 return target;
35755 case IX86_BUILTIN_GATHERSIV2DF:
35756 icode = CODE_FOR_avx2_gathersiv2df;
35757 goto gather_gen;
35758 case IX86_BUILTIN_GATHERSIV4DF:
35759 icode = CODE_FOR_avx2_gathersiv4df;
35760 goto gather_gen;
35761 case IX86_BUILTIN_GATHERDIV2DF:
35762 icode = CODE_FOR_avx2_gatherdiv2df;
35763 goto gather_gen;
35764 case IX86_BUILTIN_GATHERDIV4DF:
35765 icode = CODE_FOR_avx2_gatherdiv4df;
35766 goto gather_gen;
35767 case IX86_BUILTIN_GATHERSIV4SF:
35768 icode = CODE_FOR_avx2_gathersiv4sf;
35769 goto gather_gen;
35770 case IX86_BUILTIN_GATHERSIV8SF:
35771 icode = CODE_FOR_avx2_gathersiv8sf;
35772 goto gather_gen;
35773 case IX86_BUILTIN_GATHERDIV4SF:
35774 icode = CODE_FOR_avx2_gatherdiv4sf;
35775 goto gather_gen;
35776 case IX86_BUILTIN_GATHERDIV8SF:
35777 icode = CODE_FOR_avx2_gatherdiv8sf;
35778 goto gather_gen;
35779 case IX86_BUILTIN_GATHERSIV2DI:
35780 icode = CODE_FOR_avx2_gathersiv2di;
35781 goto gather_gen;
35782 case IX86_BUILTIN_GATHERSIV4DI:
35783 icode = CODE_FOR_avx2_gathersiv4di;
35784 goto gather_gen;
35785 case IX86_BUILTIN_GATHERDIV2DI:
35786 icode = CODE_FOR_avx2_gatherdiv2di;
35787 goto gather_gen;
35788 case IX86_BUILTIN_GATHERDIV4DI:
35789 icode = CODE_FOR_avx2_gatherdiv4di;
35790 goto gather_gen;
35791 case IX86_BUILTIN_GATHERSIV4SI:
35792 icode = CODE_FOR_avx2_gathersiv4si;
35793 goto gather_gen;
35794 case IX86_BUILTIN_GATHERSIV8SI:
35795 icode = CODE_FOR_avx2_gathersiv8si;
35796 goto gather_gen;
35797 case IX86_BUILTIN_GATHERDIV4SI:
35798 icode = CODE_FOR_avx2_gatherdiv4si;
35799 goto gather_gen;
35800 case IX86_BUILTIN_GATHERDIV8SI:
35801 icode = CODE_FOR_avx2_gatherdiv8si;
35802 goto gather_gen;
35803 case IX86_BUILTIN_GATHERALTSIV4DF:
35804 icode = CODE_FOR_avx2_gathersiv4df;
35805 goto gather_gen;
35806 case IX86_BUILTIN_GATHERALTDIV8SF:
35807 icode = CODE_FOR_avx2_gatherdiv8sf;
35808 goto gather_gen;
35809 case IX86_BUILTIN_GATHERALTSIV4DI:
35810 icode = CODE_FOR_avx2_gathersiv4di;
35811 goto gather_gen;
35812 case IX86_BUILTIN_GATHERALTDIV8SI:
35813 icode = CODE_FOR_avx2_gatherdiv8si;
35814 goto gather_gen;
35815 case IX86_BUILTIN_GATHER3SIV16SF:
35816 icode = CODE_FOR_avx512f_gathersiv16sf;
35817 goto gather_gen;
35818 case IX86_BUILTIN_GATHER3SIV8DF:
35819 icode = CODE_FOR_avx512f_gathersiv8df;
35820 goto gather_gen;
35821 case IX86_BUILTIN_GATHER3DIV16SF:
35822 icode = CODE_FOR_avx512f_gatherdiv16sf;
35823 goto gather_gen;
35824 case IX86_BUILTIN_GATHER3DIV8DF:
35825 icode = CODE_FOR_avx512f_gatherdiv8df;
35826 goto gather_gen;
35827 case IX86_BUILTIN_GATHER3SIV16SI:
35828 icode = CODE_FOR_avx512f_gathersiv16si;
35829 goto gather_gen;
35830 case IX86_BUILTIN_GATHER3SIV8DI:
35831 icode = CODE_FOR_avx512f_gathersiv8di;
35832 goto gather_gen;
35833 case IX86_BUILTIN_GATHER3DIV16SI:
35834 icode = CODE_FOR_avx512f_gatherdiv16si;
35835 goto gather_gen;
35836 case IX86_BUILTIN_GATHER3DIV8DI:
35837 icode = CODE_FOR_avx512f_gatherdiv8di;
35838 goto gather_gen;
35839 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35840 icode = CODE_FOR_avx512f_gathersiv8df;
35841 goto gather_gen;
35842 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35843 icode = CODE_FOR_avx512f_gatherdiv16sf;
35844 goto gather_gen;
35845 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35846 icode = CODE_FOR_avx512f_gathersiv8di;
35847 goto gather_gen;
35848 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35849 icode = CODE_FOR_avx512f_gatherdiv16si;
35850 goto gather_gen;
35851 case IX86_BUILTIN_SCATTERSIV16SF:
35852 icode = CODE_FOR_avx512f_scattersiv16sf;
35853 goto scatter_gen;
35854 case IX86_BUILTIN_SCATTERSIV8DF:
35855 icode = CODE_FOR_avx512f_scattersiv8df;
35856 goto scatter_gen;
35857 case IX86_BUILTIN_SCATTERDIV16SF:
35858 icode = CODE_FOR_avx512f_scatterdiv16sf;
35859 goto scatter_gen;
35860 case IX86_BUILTIN_SCATTERDIV8DF:
35861 icode = CODE_FOR_avx512f_scatterdiv8df;
35862 goto scatter_gen;
35863 case IX86_BUILTIN_SCATTERSIV16SI:
35864 icode = CODE_FOR_avx512f_scattersiv16si;
35865 goto scatter_gen;
35866 case IX86_BUILTIN_SCATTERSIV8DI:
35867 icode = CODE_FOR_avx512f_scattersiv8di;
35868 goto scatter_gen;
35869 case IX86_BUILTIN_SCATTERDIV16SI:
35870 icode = CODE_FOR_avx512f_scatterdiv16si;
35871 goto scatter_gen;
35872 case IX86_BUILTIN_SCATTERDIV8DI:
35873 icode = CODE_FOR_avx512f_scatterdiv8di;
35874 goto scatter_gen;
35876 case IX86_BUILTIN_GATHERPFDPD:
35877 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35878 goto vec_prefetch_gen;
35879 case IX86_BUILTIN_GATHERPFDPS:
35880 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35881 goto vec_prefetch_gen;
35882 case IX86_BUILTIN_GATHERPFQPD:
35883 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35884 goto vec_prefetch_gen;
35885 case IX86_BUILTIN_GATHERPFQPS:
35886 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35887 goto vec_prefetch_gen;
35888 case IX86_BUILTIN_SCATTERPFDPD:
35889 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35890 goto vec_prefetch_gen;
35891 case IX86_BUILTIN_SCATTERPFDPS:
35892 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35893 goto vec_prefetch_gen;
35894 case IX86_BUILTIN_SCATTERPFQPD:
35895 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35896 goto vec_prefetch_gen;
35897 case IX86_BUILTIN_SCATTERPFQPS:
35898 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35899 goto vec_prefetch_gen;
35901 gather_gen:
35902 rtx half;
35903 rtx (*gen) (rtx, rtx);
35905 arg0 = CALL_EXPR_ARG (exp, 0);
35906 arg1 = CALL_EXPR_ARG (exp, 1);
35907 arg2 = CALL_EXPR_ARG (exp, 2);
35908 arg3 = CALL_EXPR_ARG (exp, 3);
35909 arg4 = CALL_EXPR_ARG (exp, 4);
35910 op0 = expand_normal (arg0);
35911 op1 = expand_normal (arg1);
35912 op2 = expand_normal (arg2);
35913 op3 = expand_normal (arg3);
35914 op4 = expand_normal (arg4);
35915 /* Note the arg order is different from the operand order. */
35916 mode0 = insn_data[icode].operand[1].mode;
35917 mode2 = insn_data[icode].operand[3].mode;
35918 mode3 = insn_data[icode].operand[4].mode;
35919 mode4 = insn_data[icode].operand[5].mode;
35921 if (target == NULL_RTX
35922 || GET_MODE (target) != insn_data[icode].operand[0].mode
35923 || !insn_data[icode].operand[0].predicate (target,
35924 GET_MODE (target)))
35925 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35926 else
35927 subtarget = target;
35929 switch (fcode)
35931 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35932 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35933 half = gen_reg_rtx (V8SImode);
35934 if (!nonimmediate_operand (op2, V16SImode))
35935 op2 = copy_to_mode_reg (V16SImode, op2);
35936 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35937 op2 = half;
35938 break;
35939 case IX86_BUILTIN_GATHERALTSIV4DF:
35940 case IX86_BUILTIN_GATHERALTSIV4DI:
35941 half = gen_reg_rtx (V4SImode);
35942 if (!nonimmediate_operand (op2, V8SImode))
35943 op2 = copy_to_mode_reg (V8SImode, op2);
35944 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35945 op2 = half;
35946 break;
35947 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35948 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35949 half = gen_reg_rtx (mode0);
35950 if (mode0 == V8SFmode)
35951 gen = gen_vec_extract_lo_v16sf;
35952 else
35953 gen = gen_vec_extract_lo_v16si;
35954 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35955 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35956 emit_insn (gen (half, op0));
35957 op0 = half;
35958 if (GET_MODE (op3) != VOIDmode)
35960 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35961 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35962 emit_insn (gen (half, op3));
35963 op3 = half;
35965 break;
35966 case IX86_BUILTIN_GATHERALTDIV8SF:
35967 case IX86_BUILTIN_GATHERALTDIV8SI:
35968 half = gen_reg_rtx (mode0);
35969 if (mode0 == V4SFmode)
35970 gen = gen_vec_extract_lo_v8sf;
35971 else
35972 gen = gen_vec_extract_lo_v8si;
35973 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35974 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35975 emit_insn (gen (half, op0));
35976 op0 = half;
35977 if (GET_MODE (op3) != VOIDmode)
35979 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35980 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35981 emit_insn (gen (half, op3));
35982 op3 = half;
35984 break;
35985 default:
35986 break;
35989 /* Force memory operand only with base register here. But we
35990 don't want to do it on memory operand for other builtin
35991 functions. */
35992 op1 = ix86_zero_extend_to_Pmode (op1);
35994 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35995 op0 = copy_to_mode_reg (mode0, op0);
35996 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35997 op1 = copy_to_mode_reg (Pmode, op1);
35998 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35999 op2 = copy_to_mode_reg (mode2, op2);
36000 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
36002 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36003 op3 = copy_to_mode_reg (mode3, op3);
36005 else
36007 op3 = copy_to_reg (op3);
36008 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
36010 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36012 error ("the last argument must be scale 1, 2, 4, 8");
36013 return const0_rtx;
36016 /* Optimize. If mask is known to have all high bits set,
36017 replace op0 with pc_rtx to signal that the instruction
36018 overwrites the whole destination and doesn't use its
36019 previous contents. */
36020 if (optimize)
36022 if (TREE_CODE (arg3) == INTEGER_CST)
36024 if (integer_all_onesp (arg3))
36025 op0 = pc_rtx;
36027 else if (TREE_CODE (arg3) == VECTOR_CST)
36029 unsigned int negative = 0;
36030 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36032 tree cst = VECTOR_CST_ELT (arg3, i);
36033 if (TREE_CODE (cst) == INTEGER_CST
36034 && tree_int_cst_sign_bit (cst))
36035 negative++;
36036 else if (TREE_CODE (cst) == REAL_CST
36037 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36038 negative++;
36040 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36041 op0 = pc_rtx;
36043 else if (TREE_CODE (arg3) == SSA_NAME
36044 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36046 /* Recognize also when mask is like:
36047 __v2df src = _mm_setzero_pd ();
36048 __v2df mask = _mm_cmpeq_pd (src, src);
36050 __v8sf src = _mm256_setzero_ps ();
36051 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36052 as that is a cheaper way to load all ones into
36053 a register than having to load a constant from
36054 memory. */
36055 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
36056 if (is_gimple_call (def_stmt))
36058 tree fndecl = gimple_call_fndecl (def_stmt);
36059 if (fndecl
36060 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36061 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36063 case IX86_BUILTIN_CMPPD:
36064 case IX86_BUILTIN_CMPPS:
36065 case IX86_BUILTIN_CMPPD256:
36066 case IX86_BUILTIN_CMPPS256:
36067 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36068 break;
36069 /* FALLTHRU */
36070 case IX86_BUILTIN_CMPEQPD:
36071 case IX86_BUILTIN_CMPEQPS:
36072 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36073 && initializer_zerop (gimple_call_arg (def_stmt,
36074 1)))
36075 op0 = pc_rtx;
36076 break;
36077 default:
36078 break;
36084 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36085 if (! pat)
36086 return const0_rtx;
36087 emit_insn (pat);
36089 switch (fcode)
36091 case IX86_BUILTIN_GATHER3DIV16SF:
36092 if (target == NULL_RTX)
36093 target = gen_reg_rtx (V8SFmode);
36094 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36095 break;
36096 case IX86_BUILTIN_GATHER3DIV16SI:
36097 if (target == NULL_RTX)
36098 target = gen_reg_rtx (V8SImode);
36099 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36100 break;
36101 case IX86_BUILTIN_GATHERDIV8SF:
36102 if (target == NULL_RTX)
36103 target = gen_reg_rtx (V4SFmode);
36104 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36105 break;
36106 case IX86_BUILTIN_GATHERDIV8SI:
36107 if (target == NULL_RTX)
36108 target = gen_reg_rtx (V4SImode);
36109 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36110 break;
36111 default:
36112 target = subtarget;
36113 break;
36115 return target;
36117 scatter_gen:
36118 arg0 = CALL_EXPR_ARG (exp, 0);
36119 arg1 = CALL_EXPR_ARG (exp, 1);
36120 arg2 = CALL_EXPR_ARG (exp, 2);
36121 arg3 = CALL_EXPR_ARG (exp, 3);
36122 arg4 = CALL_EXPR_ARG (exp, 4);
36123 op0 = expand_normal (arg0);
36124 op1 = expand_normal (arg1);
36125 op2 = expand_normal (arg2);
36126 op3 = expand_normal (arg3);
36127 op4 = expand_normal (arg4);
36128 mode1 = insn_data[icode].operand[1].mode;
36129 mode2 = insn_data[icode].operand[2].mode;
36130 mode3 = insn_data[icode].operand[3].mode;
36131 mode4 = insn_data[icode].operand[4].mode;
36133 /* Force memory operand only with base register here. But we
36134 don't want to do it on memory operand for other builtin
36135 functions. */
36136 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36138 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36139 op0 = copy_to_mode_reg (Pmode, op0);
36141 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36143 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36144 op1 = copy_to_mode_reg (mode1, op1);
36146 else
36148 op1 = copy_to_reg (op1);
36149 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36152 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36153 op2 = copy_to_mode_reg (mode2, op2);
36155 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36156 op3 = copy_to_mode_reg (mode3, op3);
36158 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36160 error ("the last argument must be scale 1, 2, 4, 8");
36161 return const0_rtx;
36164 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36165 if (! pat)
36166 return const0_rtx;
36168 emit_insn (pat);
36169 return 0;
36171 vec_prefetch_gen:
36172 arg0 = CALL_EXPR_ARG (exp, 0);
36173 arg1 = CALL_EXPR_ARG (exp, 1);
36174 arg2 = CALL_EXPR_ARG (exp, 2);
36175 arg3 = CALL_EXPR_ARG (exp, 3);
36176 arg4 = CALL_EXPR_ARG (exp, 4);
36177 op0 = expand_normal (arg0);
36178 op1 = expand_normal (arg1);
36179 op2 = expand_normal (arg2);
36180 op3 = expand_normal (arg3);
36181 op4 = expand_normal (arg4);
36182 mode0 = insn_data[icode].operand[0].mode;
36183 mode1 = insn_data[icode].operand[1].mode;
36184 mode3 = insn_data[icode].operand[3].mode;
36185 mode4 = insn_data[icode].operand[4].mode;
36187 if (GET_MODE (op0) == mode0
36188 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36190 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36191 op0 = copy_to_mode_reg (mode0, op0);
36193 else if (op0 != constm1_rtx)
36195 op0 = copy_to_reg (op0);
36196 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36199 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36200 op1 = copy_to_mode_reg (mode1, op1);
36202 /* Force memory operand only with base register here. But we
36203 don't want to do it on memory operand for other builtin
36204 functions. */
36205 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36207 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36208 op2 = copy_to_mode_reg (Pmode, op2);
36210 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36212 error ("the forth argument must be scale 1, 2, 4, 8");
36213 return const0_rtx;
36216 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36218 error ("incorrect hint operand");
36219 return const0_rtx;
36222 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36223 if (! pat)
36224 return const0_rtx;
36226 emit_insn (pat);
36228 return 0;
36230 case IX86_BUILTIN_XABORT:
36231 icode = CODE_FOR_xabort;
36232 arg0 = CALL_EXPR_ARG (exp, 0);
36233 op0 = expand_normal (arg0);
36234 mode0 = insn_data[icode].operand[0].mode;
36235 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36237 error ("the xabort's argument must be an 8-bit immediate");
36238 return const0_rtx;
36240 emit_insn (gen_xabort (op0));
36241 return 0;
36243 default:
36244 break;
36247 for (i = 0, d = bdesc_special_args;
36248 i < ARRAY_SIZE (bdesc_special_args);
36249 i++, d++)
36250 if (d->code == fcode)
36251 return ix86_expand_special_args_builtin (d, exp, target);
36253 for (i = 0, d = bdesc_args;
36254 i < ARRAY_SIZE (bdesc_args);
36255 i++, d++)
36256 if (d->code == fcode)
36257 switch (fcode)
36259 case IX86_BUILTIN_FABSQ:
36260 case IX86_BUILTIN_COPYSIGNQ:
36261 if (!TARGET_SSE)
36262 /* Emit a normal call if SSE isn't available. */
36263 return expand_call (exp, target, ignore);
36264 default:
36265 return ix86_expand_args_builtin (d, exp, target);
36268 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36269 if (d->code == fcode)
36270 return ix86_expand_sse_comi (d, exp, target);
36272 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36273 if (d->code == fcode)
36274 return ix86_expand_round_builtin (d, exp, target);
36276 for (i = 0, d = bdesc_pcmpestr;
36277 i < ARRAY_SIZE (bdesc_pcmpestr);
36278 i++, d++)
36279 if (d->code == fcode)
36280 return ix86_expand_sse_pcmpestr (d, exp, target);
36282 for (i = 0, d = bdesc_pcmpistr;
36283 i < ARRAY_SIZE (bdesc_pcmpistr);
36284 i++, d++)
36285 if (d->code == fcode)
36286 return ix86_expand_sse_pcmpistr (d, exp, target);
36288 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36289 if (d->code == fcode)
36290 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36291 (enum ix86_builtin_func_type)
36292 d->flag, d->comparison);
36294 gcc_unreachable ();
36297 /* This returns the target-specific builtin with code CODE if
36298 current_function_decl has visibility on this builtin, which is checked
36299 using isa flags. Returns NULL_TREE otherwise. */
36301 static tree ix86_get_builtin (enum ix86_builtins code)
36303 struct cl_target_option *opts;
36304 tree target_tree = NULL_TREE;
36306 /* Determine the isa flags of current_function_decl. */
36308 if (current_function_decl)
36309 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36311 if (target_tree == NULL)
36312 target_tree = target_option_default_node;
36314 opts = TREE_TARGET_OPTION (target_tree);
36316 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36317 return ix86_builtin_decl (code, true);
36318 else
36319 return NULL_TREE;
36322 /* Returns a function decl for a vectorized version of the builtin function
36323 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36324 if it is not available. */
36326 static tree
36327 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36328 tree type_in)
36330 enum machine_mode in_mode, out_mode;
36331 int in_n, out_n;
36332 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36334 if (TREE_CODE (type_out) != VECTOR_TYPE
36335 || TREE_CODE (type_in) != VECTOR_TYPE
36336 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36337 return NULL_TREE;
36339 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36340 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36341 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36342 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36344 switch (fn)
36346 case BUILT_IN_SQRT:
36347 if (out_mode == DFmode && in_mode == DFmode)
36349 if (out_n == 2 && in_n == 2)
36350 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36351 else if (out_n == 4 && in_n == 4)
36352 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36353 else if (out_n == 8 && in_n == 8)
36354 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36356 break;
36358 case BUILT_IN_EXP2F:
36359 if (out_mode == SFmode && in_mode == SFmode)
36361 if (out_n == 16 && in_n == 16)
36362 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36364 break;
36366 case BUILT_IN_SQRTF:
36367 if (out_mode == SFmode && in_mode == SFmode)
36369 if (out_n == 4 && in_n == 4)
36370 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36371 else if (out_n == 8 && in_n == 8)
36372 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36373 else if (out_n == 16 && in_n == 16)
36374 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36376 break;
36378 case BUILT_IN_IFLOOR:
36379 case BUILT_IN_LFLOOR:
36380 case BUILT_IN_LLFLOOR:
36381 /* The round insn does not trap on denormals. */
36382 if (flag_trapping_math || !TARGET_ROUND)
36383 break;
36385 if (out_mode == SImode && in_mode == DFmode)
36387 if (out_n == 4 && in_n == 2)
36388 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36389 else if (out_n == 8 && in_n == 4)
36390 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36391 else if (out_n == 16 && in_n == 8)
36392 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36394 break;
36396 case BUILT_IN_IFLOORF:
36397 case BUILT_IN_LFLOORF:
36398 case BUILT_IN_LLFLOORF:
36399 /* The round insn does not trap on denormals. */
36400 if (flag_trapping_math || !TARGET_ROUND)
36401 break;
36403 if (out_mode == SImode && in_mode == SFmode)
36405 if (out_n == 4 && in_n == 4)
36406 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36407 else if (out_n == 8 && in_n == 8)
36408 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36410 break;
36412 case BUILT_IN_ICEIL:
36413 case BUILT_IN_LCEIL:
36414 case BUILT_IN_LLCEIL:
36415 /* The round insn does not trap on denormals. */
36416 if (flag_trapping_math || !TARGET_ROUND)
36417 break;
36419 if (out_mode == SImode && in_mode == DFmode)
36421 if (out_n == 4 && in_n == 2)
36422 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36423 else if (out_n == 8 && in_n == 4)
36424 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36425 else if (out_n == 16 && in_n == 8)
36426 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36428 break;
36430 case BUILT_IN_ICEILF:
36431 case BUILT_IN_LCEILF:
36432 case BUILT_IN_LLCEILF:
36433 /* The round insn does not trap on denormals. */
36434 if (flag_trapping_math || !TARGET_ROUND)
36435 break;
36437 if (out_mode == SImode && in_mode == SFmode)
36439 if (out_n == 4 && in_n == 4)
36440 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36441 else if (out_n == 8 && in_n == 8)
36442 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36444 break;
36446 case BUILT_IN_IRINT:
36447 case BUILT_IN_LRINT:
36448 case BUILT_IN_LLRINT:
36449 if (out_mode == SImode && in_mode == DFmode)
36451 if (out_n == 4 && in_n == 2)
36452 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36453 else if (out_n == 8 && in_n == 4)
36454 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36456 break;
36458 case BUILT_IN_IRINTF:
36459 case BUILT_IN_LRINTF:
36460 case BUILT_IN_LLRINTF:
36461 if (out_mode == SImode && in_mode == SFmode)
36463 if (out_n == 4 && in_n == 4)
36464 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36465 else if (out_n == 8 && in_n == 8)
36466 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36468 break;
36470 case BUILT_IN_IROUND:
36471 case BUILT_IN_LROUND:
36472 case BUILT_IN_LLROUND:
36473 /* The round insn does not trap on denormals. */
36474 if (flag_trapping_math || !TARGET_ROUND)
36475 break;
36477 if (out_mode == SImode && in_mode == DFmode)
36479 if (out_n == 4 && in_n == 2)
36480 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36481 else if (out_n == 8 && in_n == 4)
36482 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36483 else if (out_n == 16 && in_n == 8)
36484 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36486 break;
36488 case BUILT_IN_IROUNDF:
36489 case BUILT_IN_LROUNDF:
36490 case BUILT_IN_LLROUNDF:
36491 /* The round insn does not trap on denormals. */
36492 if (flag_trapping_math || !TARGET_ROUND)
36493 break;
36495 if (out_mode == SImode && in_mode == SFmode)
36497 if (out_n == 4 && in_n == 4)
36498 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36499 else if (out_n == 8 && in_n == 8)
36500 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36502 break;
36504 case BUILT_IN_COPYSIGN:
36505 if (out_mode == DFmode && in_mode == DFmode)
36507 if (out_n == 2 && in_n == 2)
36508 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36509 else if (out_n == 4 && in_n == 4)
36510 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36511 else if (out_n == 8 && in_n == 8)
36512 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36514 break;
36516 case BUILT_IN_COPYSIGNF:
36517 if (out_mode == SFmode && in_mode == SFmode)
36519 if (out_n == 4 && in_n == 4)
36520 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36521 else if (out_n == 8 && in_n == 8)
36522 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36523 else if (out_n == 16 && in_n == 16)
36524 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36526 break;
36528 case BUILT_IN_FLOOR:
36529 /* The round insn does not trap on denormals. */
36530 if (flag_trapping_math || !TARGET_ROUND)
36531 break;
36533 if (out_mode == DFmode && in_mode == DFmode)
36535 if (out_n == 2 && in_n == 2)
36536 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36537 else if (out_n == 4 && in_n == 4)
36538 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36540 break;
36542 case BUILT_IN_FLOORF:
36543 /* The round insn does not trap on denormals. */
36544 if (flag_trapping_math || !TARGET_ROUND)
36545 break;
36547 if (out_mode == SFmode && in_mode == SFmode)
36549 if (out_n == 4 && in_n == 4)
36550 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36551 else if (out_n == 8 && in_n == 8)
36552 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36554 break;
36556 case BUILT_IN_CEIL:
36557 /* The round insn does not trap on denormals. */
36558 if (flag_trapping_math || !TARGET_ROUND)
36559 break;
36561 if (out_mode == DFmode && in_mode == DFmode)
36563 if (out_n == 2 && in_n == 2)
36564 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36565 else if (out_n == 4 && in_n == 4)
36566 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36568 break;
36570 case BUILT_IN_CEILF:
36571 /* The round insn does not trap on denormals. */
36572 if (flag_trapping_math || !TARGET_ROUND)
36573 break;
36575 if (out_mode == SFmode && in_mode == SFmode)
36577 if (out_n == 4 && in_n == 4)
36578 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36579 else if (out_n == 8 && in_n == 8)
36580 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36582 break;
36584 case BUILT_IN_TRUNC:
36585 /* The round insn does not trap on denormals. */
36586 if (flag_trapping_math || !TARGET_ROUND)
36587 break;
36589 if (out_mode == DFmode && in_mode == DFmode)
36591 if (out_n == 2 && in_n == 2)
36592 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36593 else if (out_n == 4 && in_n == 4)
36594 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36596 break;
36598 case BUILT_IN_TRUNCF:
36599 /* The round insn does not trap on denormals. */
36600 if (flag_trapping_math || !TARGET_ROUND)
36601 break;
36603 if (out_mode == SFmode && in_mode == SFmode)
36605 if (out_n == 4 && in_n == 4)
36606 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36607 else if (out_n == 8 && in_n == 8)
36608 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36610 break;
36612 case BUILT_IN_RINT:
36613 /* The round insn does not trap on denormals. */
36614 if (flag_trapping_math || !TARGET_ROUND)
36615 break;
36617 if (out_mode == DFmode && in_mode == DFmode)
36619 if (out_n == 2 && in_n == 2)
36620 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36621 else if (out_n == 4 && in_n == 4)
36622 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36624 break;
36626 case BUILT_IN_RINTF:
36627 /* The round insn does not trap on denormals. */
36628 if (flag_trapping_math || !TARGET_ROUND)
36629 break;
36631 if (out_mode == SFmode && in_mode == SFmode)
36633 if (out_n == 4 && in_n == 4)
36634 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36635 else if (out_n == 8 && in_n == 8)
36636 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36638 break;
36640 case BUILT_IN_ROUND:
36641 /* The round insn does not trap on denormals. */
36642 if (flag_trapping_math || !TARGET_ROUND)
36643 break;
36645 if (out_mode == DFmode && in_mode == DFmode)
36647 if (out_n == 2 && in_n == 2)
36648 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36649 else if (out_n == 4 && in_n == 4)
36650 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36652 break;
36654 case BUILT_IN_ROUNDF:
36655 /* The round insn does not trap on denormals. */
36656 if (flag_trapping_math || !TARGET_ROUND)
36657 break;
36659 if (out_mode == SFmode && in_mode == SFmode)
36661 if (out_n == 4 && in_n == 4)
36662 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36663 else if (out_n == 8 && in_n == 8)
36664 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36666 break;
36668 case BUILT_IN_FMA:
36669 if (out_mode == DFmode && in_mode == DFmode)
36671 if (out_n == 2 && in_n == 2)
36672 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36673 if (out_n == 4 && in_n == 4)
36674 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36676 break;
36678 case BUILT_IN_FMAF:
36679 if (out_mode == SFmode && in_mode == SFmode)
36681 if (out_n == 4 && in_n == 4)
36682 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36683 if (out_n == 8 && in_n == 8)
36684 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36686 break;
36688 default:
36689 break;
36692 /* Dispatch to a handler for a vectorization library. */
36693 if (ix86_veclib_handler)
36694 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36695 type_in);
36697 return NULL_TREE;
36700 /* Handler for an SVML-style interface to
36701 a library with vectorized intrinsics. */
36703 static tree
36704 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36706 char name[20];
36707 tree fntype, new_fndecl, args;
36708 unsigned arity;
36709 const char *bname;
36710 enum machine_mode el_mode, in_mode;
36711 int n, in_n;
36713 /* The SVML is suitable for unsafe math only. */
36714 if (!flag_unsafe_math_optimizations)
36715 return NULL_TREE;
36717 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36718 n = TYPE_VECTOR_SUBPARTS (type_out);
36719 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36720 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36721 if (el_mode != in_mode
36722 || n != in_n)
36723 return NULL_TREE;
36725 switch (fn)
36727 case BUILT_IN_EXP:
36728 case BUILT_IN_LOG:
36729 case BUILT_IN_LOG10:
36730 case BUILT_IN_POW:
36731 case BUILT_IN_TANH:
36732 case BUILT_IN_TAN:
36733 case BUILT_IN_ATAN:
36734 case BUILT_IN_ATAN2:
36735 case BUILT_IN_ATANH:
36736 case BUILT_IN_CBRT:
36737 case BUILT_IN_SINH:
36738 case BUILT_IN_SIN:
36739 case BUILT_IN_ASINH:
36740 case BUILT_IN_ASIN:
36741 case BUILT_IN_COSH:
36742 case BUILT_IN_COS:
36743 case BUILT_IN_ACOSH:
36744 case BUILT_IN_ACOS:
36745 if (el_mode != DFmode || n != 2)
36746 return NULL_TREE;
36747 break;
36749 case BUILT_IN_EXPF:
36750 case BUILT_IN_LOGF:
36751 case BUILT_IN_LOG10F:
36752 case BUILT_IN_POWF:
36753 case BUILT_IN_TANHF:
36754 case BUILT_IN_TANF:
36755 case BUILT_IN_ATANF:
36756 case BUILT_IN_ATAN2F:
36757 case BUILT_IN_ATANHF:
36758 case BUILT_IN_CBRTF:
36759 case BUILT_IN_SINHF:
36760 case BUILT_IN_SINF:
36761 case BUILT_IN_ASINHF:
36762 case BUILT_IN_ASINF:
36763 case BUILT_IN_COSHF:
36764 case BUILT_IN_COSF:
36765 case BUILT_IN_ACOSHF:
36766 case BUILT_IN_ACOSF:
36767 if (el_mode != SFmode || n != 4)
36768 return NULL_TREE;
36769 break;
36771 default:
36772 return NULL_TREE;
36775 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36777 if (fn == BUILT_IN_LOGF)
36778 strcpy (name, "vmlsLn4");
36779 else if (fn == BUILT_IN_LOG)
36780 strcpy (name, "vmldLn2");
36781 else if (n == 4)
36783 sprintf (name, "vmls%s", bname+10);
36784 name[strlen (name)-1] = '4';
36786 else
36787 sprintf (name, "vmld%s2", bname+10);
36789 /* Convert to uppercase. */
36790 name[4] &= ~0x20;
36792 arity = 0;
36793 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36794 args;
36795 args = TREE_CHAIN (args))
36796 arity++;
36798 if (arity == 1)
36799 fntype = build_function_type_list (type_out, type_in, NULL);
36800 else
36801 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36803 /* Build a function declaration for the vectorized function. */
36804 new_fndecl = build_decl (BUILTINS_LOCATION,
36805 FUNCTION_DECL, get_identifier (name), fntype);
36806 TREE_PUBLIC (new_fndecl) = 1;
36807 DECL_EXTERNAL (new_fndecl) = 1;
36808 DECL_IS_NOVOPS (new_fndecl) = 1;
36809 TREE_READONLY (new_fndecl) = 1;
36811 return new_fndecl;
36814 /* Handler for an ACML-style interface to
36815 a library with vectorized intrinsics. */
36817 static tree
36818 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36820 char name[20] = "__vr.._";
36821 tree fntype, new_fndecl, args;
36822 unsigned arity;
36823 const char *bname;
36824 enum machine_mode el_mode, in_mode;
36825 int n, in_n;
36827 /* The ACML is 64bits only and suitable for unsafe math only as
36828 it does not correctly support parts of IEEE with the required
36829 precision such as denormals. */
36830 if (!TARGET_64BIT
36831 || !flag_unsafe_math_optimizations)
36832 return NULL_TREE;
36834 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36835 n = TYPE_VECTOR_SUBPARTS (type_out);
36836 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36837 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36838 if (el_mode != in_mode
36839 || n != in_n)
36840 return NULL_TREE;
36842 switch (fn)
36844 case BUILT_IN_SIN:
36845 case BUILT_IN_COS:
36846 case BUILT_IN_EXP:
36847 case BUILT_IN_LOG:
36848 case BUILT_IN_LOG2:
36849 case BUILT_IN_LOG10:
36850 name[4] = 'd';
36851 name[5] = '2';
36852 if (el_mode != DFmode
36853 || n != 2)
36854 return NULL_TREE;
36855 break;
36857 case BUILT_IN_SINF:
36858 case BUILT_IN_COSF:
36859 case BUILT_IN_EXPF:
36860 case BUILT_IN_POWF:
36861 case BUILT_IN_LOGF:
36862 case BUILT_IN_LOG2F:
36863 case BUILT_IN_LOG10F:
36864 name[4] = 's';
36865 name[5] = '4';
36866 if (el_mode != SFmode
36867 || n != 4)
36868 return NULL_TREE;
36869 break;
36871 default:
36872 return NULL_TREE;
36875 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36876 sprintf (name + 7, "%s", bname+10);
36878 arity = 0;
36879 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36880 args;
36881 args = TREE_CHAIN (args))
36882 arity++;
36884 if (arity == 1)
36885 fntype = build_function_type_list (type_out, type_in, NULL);
36886 else
36887 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36889 /* Build a function declaration for the vectorized function. */
36890 new_fndecl = build_decl (BUILTINS_LOCATION,
36891 FUNCTION_DECL, get_identifier (name), fntype);
36892 TREE_PUBLIC (new_fndecl) = 1;
36893 DECL_EXTERNAL (new_fndecl) = 1;
36894 DECL_IS_NOVOPS (new_fndecl) = 1;
36895 TREE_READONLY (new_fndecl) = 1;
36897 return new_fndecl;
36900 /* Returns a decl of a function that implements gather load with
36901 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36902 Return NULL_TREE if it is not available. */
36904 static tree
36905 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36906 const_tree index_type, int scale)
36908 bool si;
36909 enum ix86_builtins code;
36911 if (! TARGET_AVX2)
36912 return NULL_TREE;
36914 if ((TREE_CODE (index_type) != INTEGER_TYPE
36915 && !POINTER_TYPE_P (index_type))
36916 || (TYPE_MODE (index_type) != SImode
36917 && TYPE_MODE (index_type) != DImode))
36918 return NULL_TREE;
36920 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36921 return NULL_TREE;
36923 /* v*gather* insn sign extends index to pointer mode. */
36924 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36925 && TYPE_UNSIGNED (index_type))
36926 return NULL_TREE;
36928 if (scale <= 0
36929 || scale > 8
36930 || (scale & (scale - 1)) != 0)
36931 return NULL_TREE;
36933 si = TYPE_MODE (index_type) == SImode;
36934 switch (TYPE_MODE (mem_vectype))
36936 case V2DFmode:
36937 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36938 break;
36939 case V4DFmode:
36940 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36941 break;
36942 case V2DImode:
36943 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36944 break;
36945 case V4DImode:
36946 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36947 break;
36948 case V4SFmode:
36949 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36950 break;
36951 case V8SFmode:
36952 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36953 break;
36954 case V4SImode:
36955 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36956 break;
36957 case V8SImode:
36958 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36959 break;
36960 case V8DFmode:
36961 if (TARGET_AVX512F)
36962 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36963 else
36964 return NULL_TREE;
36965 break;
36966 case V8DImode:
36967 if (TARGET_AVX512F)
36968 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36969 else
36970 return NULL_TREE;
36971 break;
36972 case V16SFmode:
36973 if (TARGET_AVX512F)
36974 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36975 else
36976 return NULL_TREE;
36977 break;
36978 case V16SImode:
36979 if (TARGET_AVX512F)
36980 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36981 else
36982 return NULL_TREE;
36983 break;
36984 default:
36985 return NULL_TREE;
36988 return ix86_get_builtin (code);
36991 /* Returns a code for a target-specific builtin that implements
36992 reciprocal of the function, or NULL_TREE if not available. */
36994 static tree
36995 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
36997 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36998 && flag_finite_math_only && !flag_trapping_math
36999 && flag_unsafe_math_optimizations))
37000 return NULL_TREE;
37002 if (md_fn)
37003 /* Machine dependent builtins. */
37004 switch (fn)
37006 /* Vectorized version of sqrt to rsqrt conversion. */
37007 case IX86_BUILTIN_SQRTPS_NR:
37008 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
37010 case IX86_BUILTIN_SQRTPS_NR256:
37011 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
37013 default:
37014 return NULL_TREE;
37016 else
37017 /* Normal builtins. */
37018 switch (fn)
37020 /* Sqrt to rsqrt conversion. */
37021 case BUILT_IN_SQRTF:
37022 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
37024 default:
37025 return NULL_TREE;
37029 /* Helper for avx_vpermilps256_operand et al. This is also used by
37030 the expansion functions to turn the parallel back into a mask.
37031 The return value is 0 for no match and the imm8+1 for a match. */
37034 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
37036 unsigned i, nelt = GET_MODE_NUNITS (mode);
37037 unsigned mask = 0;
37038 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37040 if (XVECLEN (par, 0) != (int) nelt)
37041 return 0;
37043 /* Validate that all of the elements are constants, and not totally
37044 out of range. Copy the data into an integral array to make the
37045 subsequent checks easier. */
37046 for (i = 0; i < nelt; ++i)
37048 rtx er = XVECEXP (par, 0, i);
37049 unsigned HOST_WIDE_INT ei;
37051 if (!CONST_INT_P (er))
37052 return 0;
37053 ei = INTVAL (er);
37054 if (ei >= nelt)
37055 return 0;
37056 ipar[i] = ei;
37059 switch (mode)
37061 case V8DFmode:
37062 /* In the 512-bit DFmode case, we can only move elements within
37063 a 128-bit lane. First fill the second part of the mask,
37064 then fallthru. */
37065 for (i = 4; i < 6; ++i)
37067 if (ipar[i] < 4 || ipar[i] >= 6)
37068 return 0;
37069 mask |= (ipar[i] - 4) << i;
37071 for (i = 6; i < 8; ++i)
37073 if (ipar[i] < 6)
37074 return 0;
37075 mask |= (ipar[i] - 6) << i;
37077 /* FALLTHRU */
37079 case V4DFmode:
37080 /* In the 256-bit DFmode case, we can only move elements within
37081 a 128-bit lane. */
37082 for (i = 0; i < 2; ++i)
37084 if (ipar[i] >= 2)
37085 return 0;
37086 mask |= ipar[i] << i;
37088 for (i = 2; i < 4; ++i)
37090 if (ipar[i] < 2)
37091 return 0;
37092 mask |= (ipar[i] - 2) << i;
37094 break;
37096 case V16SFmode:
37097 /* In 512 bit SFmode case, permutation in the upper 256 bits
37098 must mirror the permutation in the lower 256-bits. */
37099 for (i = 0; i < 8; ++i)
37100 if (ipar[i] + 8 != ipar[i + 8])
37101 return 0;
37102 /* FALLTHRU */
37104 case V8SFmode:
37105 /* In 256 bit SFmode case, we have full freedom of
37106 movement within the low 128-bit lane, but the high 128-bit
37107 lane must mirror the exact same pattern. */
37108 for (i = 0; i < 4; ++i)
37109 if (ipar[i] + 4 != ipar[i + 4])
37110 return 0;
37111 nelt = 4;
37112 /* FALLTHRU */
37114 case V2DFmode:
37115 case V4SFmode:
37116 /* In the 128-bit case, we've full freedom in the placement of
37117 the elements from the source operand. */
37118 for (i = 0; i < nelt; ++i)
37119 mask |= ipar[i] << (i * (nelt / 2));
37120 break;
37122 default:
37123 gcc_unreachable ();
37126 /* Make sure success has a non-zero value by adding one. */
37127 return mask + 1;
37130 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37131 the expansion functions to turn the parallel back into a mask.
37132 The return value is 0 for no match and the imm8+1 for a match. */
37135 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37137 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37138 unsigned mask = 0;
37139 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37141 if (XVECLEN (par, 0) != (int) nelt)
37142 return 0;
37144 /* Validate that all of the elements are constants, and not totally
37145 out of range. Copy the data into an integral array to make the
37146 subsequent checks easier. */
37147 for (i = 0; i < nelt; ++i)
37149 rtx er = XVECEXP (par, 0, i);
37150 unsigned HOST_WIDE_INT ei;
37152 if (!CONST_INT_P (er))
37153 return 0;
37154 ei = INTVAL (er);
37155 if (ei >= 2 * nelt)
37156 return 0;
37157 ipar[i] = ei;
37160 /* Validate that the halves of the permute are halves. */
37161 for (i = 0; i < nelt2 - 1; ++i)
37162 if (ipar[i] + 1 != ipar[i + 1])
37163 return 0;
37164 for (i = nelt2; i < nelt - 1; ++i)
37165 if (ipar[i] + 1 != ipar[i + 1])
37166 return 0;
37168 /* Reconstruct the mask. */
37169 for (i = 0; i < 2; ++i)
37171 unsigned e = ipar[i * nelt2];
37172 if (e % nelt2)
37173 return 0;
37174 e /= nelt2;
37175 mask |= e << (i * 4);
37178 /* Make sure success has a non-zero value by adding one. */
37179 return mask + 1;
37182 /* Return a register priority for hard reg REGNO. */
37183 static int
37184 ix86_register_priority (int hard_regno)
37186 /* ebp and r13 as the base always wants a displacement, r12 as the
37187 base always wants an index. So discourage their usage in an
37188 address. */
37189 if (hard_regno == R12_REG || hard_regno == R13_REG)
37190 return 0;
37191 if (hard_regno == BP_REG)
37192 return 1;
37193 /* New x86-64 int registers result in bigger code size. Discourage
37194 them. */
37195 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37196 return 2;
37197 /* New x86-64 SSE registers result in bigger code size. Discourage
37198 them. */
37199 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37200 return 2;
37201 /* Usage of AX register results in smaller code. Prefer it. */
37202 if (hard_regno == 0)
37203 return 4;
37204 return 3;
37207 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37209 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37210 QImode must go into class Q_REGS.
37211 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37212 movdf to do mem-to-mem moves through integer regs. */
37214 static reg_class_t
37215 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37217 enum machine_mode mode = GET_MODE (x);
37219 /* We're only allowed to return a subclass of CLASS. Many of the
37220 following checks fail for NO_REGS, so eliminate that early. */
37221 if (regclass == NO_REGS)
37222 return NO_REGS;
37224 /* All classes can load zeros. */
37225 if (x == CONST0_RTX (mode))
37226 return regclass;
37228 /* Force constants into memory if we are loading a (nonzero) constant into
37229 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37230 instructions to load from a constant. */
37231 if (CONSTANT_P (x)
37232 && (MAYBE_MMX_CLASS_P (regclass)
37233 || MAYBE_SSE_CLASS_P (regclass)
37234 || MAYBE_MASK_CLASS_P (regclass)))
37235 return NO_REGS;
37237 /* Prefer SSE regs only, if we can use them for math. */
37238 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37239 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37241 /* Floating-point constants need more complex checks. */
37242 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37244 /* General regs can load everything. */
37245 if (reg_class_subset_p (regclass, GENERAL_REGS))
37246 return regclass;
37248 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37249 zero above. We only want to wind up preferring 80387 registers if
37250 we plan on doing computation with them. */
37251 if (TARGET_80387
37252 && standard_80387_constant_p (x) > 0)
37254 /* Limit class to non-sse. */
37255 if (regclass == FLOAT_SSE_REGS)
37256 return FLOAT_REGS;
37257 if (regclass == FP_TOP_SSE_REGS)
37258 return FP_TOP_REG;
37259 if (regclass == FP_SECOND_SSE_REGS)
37260 return FP_SECOND_REG;
37261 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37262 return regclass;
37265 return NO_REGS;
37268 /* Generally when we see PLUS here, it's the function invariant
37269 (plus soft-fp const_int). Which can only be computed into general
37270 regs. */
37271 if (GET_CODE (x) == PLUS)
37272 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37274 /* QImode constants are easy to load, but non-constant QImode data
37275 must go into Q_REGS. */
37276 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37278 if (reg_class_subset_p (regclass, Q_REGS))
37279 return regclass;
37280 if (reg_class_subset_p (Q_REGS, regclass))
37281 return Q_REGS;
37282 return NO_REGS;
37285 return regclass;
37288 /* Discourage putting floating-point values in SSE registers unless
37289 SSE math is being used, and likewise for the 387 registers. */
37290 static reg_class_t
37291 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37293 enum machine_mode mode = GET_MODE (x);
37295 /* Restrict the output reload class to the register bank that we are doing
37296 math on. If we would like not to return a subset of CLASS, reject this
37297 alternative: if reload cannot do this, it will still use its choice. */
37298 mode = GET_MODE (x);
37299 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37300 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37302 if (X87_FLOAT_MODE_P (mode))
37304 if (regclass == FP_TOP_SSE_REGS)
37305 return FP_TOP_REG;
37306 else if (regclass == FP_SECOND_SSE_REGS)
37307 return FP_SECOND_REG;
37308 else
37309 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37312 return regclass;
37315 static reg_class_t
37316 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37317 enum machine_mode mode, secondary_reload_info *sri)
37319 /* Double-word spills from general registers to non-offsettable memory
37320 references (zero-extended addresses) require special handling. */
37321 if (TARGET_64BIT
37322 && MEM_P (x)
37323 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37324 && INTEGER_CLASS_P (rclass)
37325 && !offsettable_memref_p (x))
37327 sri->icode = (in_p
37328 ? CODE_FOR_reload_noff_load
37329 : CODE_FOR_reload_noff_store);
37330 /* Add the cost of moving address to a temporary. */
37331 sri->extra_cost = 1;
37333 return NO_REGS;
37336 /* QImode spills from non-QI registers require
37337 intermediate register on 32bit targets. */
37338 if (mode == QImode
37339 && (MAYBE_MASK_CLASS_P (rclass)
37340 || (!TARGET_64BIT && !in_p
37341 && INTEGER_CLASS_P (rclass)
37342 && MAYBE_NON_Q_CLASS_P (rclass))))
37344 int regno;
37346 if (REG_P (x))
37347 regno = REGNO (x);
37348 else
37349 regno = -1;
37351 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37352 regno = true_regnum (x);
37354 /* Return Q_REGS if the operand is in memory. */
37355 if (regno == -1)
37356 return Q_REGS;
37359 /* This condition handles corner case where an expression involving
37360 pointers gets vectorized. We're trying to use the address of a
37361 stack slot as a vector initializer.
37363 (set (reg:V2DI 74 [ vect_cst_.2 ])
37364 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37366 Eventually frame gets turned into sp+offset like this:
37368 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37369 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37370 (const_int 392 [0x188]))))
37372 That later gets turned into:
37374 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37375 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37376 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37378 We'll have the following reload recorded:
37380 Reload 0: reload_in (DI) =
37381 (plus:DI (reg/f:DI 7 sp)
37382 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37383 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37384 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37385 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37386 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37387 reload_reg_rtx: (reg:V2DI 22 xmm1)
37389 Which isn't going to work since SSE instructions can't handle scalar
37390 additions. Returning GENERAL_REGS forces the addition into integer
37391 register and reload can handle subsequent reloads without problems. */
37393 if (in_p && GET_CODE (x) == PLUS
37394 && SSE_CLASS_P (rclass)
37395 && SCALAR_INT_MODE_P (mode))
37396 return GENERAL_REGS;
37398 return NO_REGS;
37401 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37403 static bool
37404 ix86_class_likely_spilled_p (reg_class_t rclass)
37406 switch (rclass)
37408 case AREG:
37409 case DREG:
37410 case CREG:
37411 case BREG:
37412 case AD_REGS:
37413 case SIREG:
37414 case DIREG:
37415 case SSE_FIRST_REG:
37416 case FP_TOP_REG:
37417 case FP_SECOND_REG:
37418 return true;
37420 default:
37421 break;
37424 return false;
37427 /* If we are copying between general and FP registers, we need a memory
37428 location. The same is true for SSE and MMX registers.
37430 To optimize register_move_cost performance, allow inline variant.
37432 The macro can't work reliably when one of the CLASSES is class containing
37433 registers from multiple units (SSE, MMX, integer). We avoid this by never
37434 combining those units in single alternative in the machine description.
37435 Ensure that this constraint holds to avoid unexpected surprises.
37437 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37438 enforce these sanity checks. */
37440 static inline bool
37441 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37442 enum machine_mode mode, int strict)
37444 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37445 return false;
37446 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37447 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37448 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37449 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37450 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37451 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37453 gcc_assert (!strict || lra_in_progress);
37454 return true;
37457 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37458 return true;
37460 /* Between mask and general, we have moves no larger than word size. */
37461 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
37462 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
37463 return true;
37465 /* ??? This is a lie. We do have moves between mmx/general, and for
37466 mmx/sse2. But by saying we need secondary memory we discourage the
37467 register allocator from using the mmx registers unless needed. */
37468 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37469 return true;
37471 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37473 /* SSE1 doesn't have any direct moves from other classes. */
37474 if (!TARGET_SSE2)
37475 return true;
37477 /* If the target says that inter-unit moves are more expensive
37478 than moving through memory, then don't generate them. */
37479 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37480 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37481 return true;
37483 /* Between SSE and general, we have moves no larger than word size. */
37484 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37485 return true;
37488 return false;
37491 bool
37492 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37493 enum machine_mode mode, int strict)
37495 return inline_secondary_memory_needed (class1, class2, mode, strict);
37498 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37500 On the 80386, this is the size of MODE in words,
37501 except in the FP regs, where a single reg is always enough. */
37503 static unsigned char
37504 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37506 if (MAYBE_INTEGER_CLASS_P (rclass))
37508 if (mode == XFmode)
37509 return (TARGET_64BIT ? 2 : 3);
37510 else if (mode == XCmode)
37511 return (TARGET_64BIT ? 4 : 6);
37512 else
37513 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37515 else
37517 if (COMPLEX_MODE_P (mode))
37518 return 2;
37519 else
37520 return 1;
37524 /* Return true if the registers in CLASS cannot represent the change from
37525 modes FROM to TO. */
37527 bool
37528 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37529 enum reg_class regclass)
37531 if (from == to)
37532 return false;
37534 /* x87 registers can't do subreg at all, as all values are reformatted
37535 to extended precision. */
37536 if (MAYBE_FLOAT_CLASS_P (regclass))
37537 return true;
37539 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37541 /* Vector registers do not support QI or HImode loads. If we don't
37542 disallow a change to these modes, reload will assume it's ok to
37543 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37544 the vec_dupv4hi pattern. */
37545 if (GET_MODE_SIZE (from) < 4)
37546 return true;
37548 /* Vector registers do not support subreg with nonzero offsets, which
37549 are otherwise valid for integer registers. Since we can't see
37550 whether we have a nonzero offset from here, prohibit all
37551 nonparadoxical subregs changing size. */
37552 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37553 return true;
37556 return false;
37559 /* Return the cost of moving data of mode M between a
37560 register and memory. A value of 2 is the default; this cost is
37561 relative to those in `REGISTER_MOVE_COST'.
37563 This function is used extensively by register_move_cost that is used to
37564 build tables at startup. Make it inline in this case.
37565 When IN is 2, return maximum of in and out move cost.
37567 If moving between registers and memory is more expensive than
37568 between two registers, you should define this macro to express the
37569 relative cost.
37571 Model also increased moving costs of QImode registers in non
37572 Q_REGS classes.
37574 static inline int
37575 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37576 int in)
37578 int cost;
37579 if (FLOAT_CLASS_P (regclass))
37581 int index;
37582 switch (mode)
37584 case SFmode:
37585 index = 0;
37586 break;
37587 case DFmode:
37588 index = 1;
37589 break;
37590 case XFmode:
37591 index = 2;
37592 break;
37593 default:
37594 return 100;
37596 if (in == 2)
37597 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37598 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37600 if (SSE_CLASS_P (regclass))
37602 int index;
37603 switch (GET_MODE_SIZE (mode))
37605 case 4:
37606 index = 0;
37607 break;
37608 case 8:
37609 index = 1;
37610 break;
37611 case 16:
37612 index = 2;
37613 break;
37614 default:
37615 return 100;
37617 if (in == 2)
37618 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37619 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37621 if (MMX_CLASS_P (regclass))
37623 int index;
37624 switch (GET_MODE_SIZE (mode))
37626 case 4:
37627 index = 0;
37628 break;
37629 case 8:
37630 index = 1;
37631 break;
37632 default:
37633 return 100;
37635 if (in)
37636 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37637 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37639 switch (GET_MODE_SIZE (mode))
37641 case 1:
37642 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37644 if (!in)
37645 return ix86_cost->int_store[0];
37646 if (TARGET_PARTIAL_REG_DEPENDENCY
37647 && optimize_function_for_speed_p (cfun))
37648 cost = ix86_cost->movzbl_load;
37649 else
37650 cost = ix86_cost->int_load[0];
37651 if (in == 2)
37652 return MAX (cost, ix86_cost->int_store[0]);
37653 return cost;
37655 else
37657 if (in == 2)
37658 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37659 if (in)
37660 return ix86_cost->movzbl_load;
37661 else
37662 return ix86_cost->int_store[0] + 4;
37664 break;
37665 case 2:
37666 if (in == 2)
37667 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37668 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37669 default:
37670 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37671 if (mode == TFmode)
37672 mode = XFmode;
37673 if (in == 2)
37674 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37675 else if (in)
37676 cost = ix86_cost->int_load[2];
37677 else
37678 cost = ix86_cost->int_store[2];
37679 return (cost * (((int) GET_MODE_SIZE (mode)
37680 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37684 static int
37685 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37686 bool in)
37688 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37692 /* Return the cost of moving data from a register in class CLASS1 to
37693 one in class CLASS2.
37695 It is not required that the cost always equal 2 when FROM is the same as TO;
37696 on some machines it is expensive to move between registers if they are not
37697 general registers. */
37699 static int
37700 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37701 reg_class_t class2_i)
37703 enum reg_class class1 = (enum reg_class) class1_i;
37704 enum reg_class class2 = (enum reg_class) class2_i;
37706 /* In case we require secondary memory, compute cost of the store followed
37707 by load. In order to avoid bad register allocation choices, we need
37708 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37710 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37712 int cost = 1;
37714 cost += inline_memory_move_cost (mode, class1, 2);
37715 cost += inline_memory_move_cost (mode, class2, 2);
37717 /* In case of copying from general_purpose_register we may emit multiple
37718 stores followed by single load causing memory size mismatch stall.
37719 Count this as arbitrarily high cost of 20. */
37720 if (targetm.class_max_nregs (class1, mode)
37721 > targetm.class_max_nregs (class2, mode))
37722 cost += 20;
37724 /* In the case of FP/MMX moves, the registers actually overlap, and we
37725 have to switch modes in order to treat them differently. */
37726 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37727 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37728 cost += 20;
37730 return cost;
37733 /* Moves between SSE/MMX and integer unit are expensive. */
37734 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37735 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37737 /* ??? By keeping returned value relatively high, we limit the number
37738 of moves between integer and MMX/SSE registers for all targets.
37739 Additionally, high value prevents problem with x86_modes_tieable_p(),
37740 where integer modes in MMX/SSE registers are not tieable
37741 because of missing QImode and HImode moves to, from or between
37742 MMX/SSE registers. */
37743 return MAX (8, ix86_cost->mmxsse_to_integer);
37745 if (MAYBE_FLOAT_CLASS_P (class1))
37746 return ix86_cost->fp_move;
37747 if (MAYBE_SSE_CLASS_P (class1))
37748 return ix86_cost->sse_move;
37749 if (MAYBE_MMX_CLASS_P (class1))
37750 return ix86_cost->mmx_move;
37751 return 2;
37754 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37755 MODE. */
37757 bool
37758 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37760 /* Flags and only flags can only hold CCmode values. */
37761 if (CC_REGNO_P (regno))
37762 return GET_MODE_CLASS (mode) == MODE_CC;
37763 if (GET_MODE_CLASS (mode) == MODE_CC
37764 || GET_MODE_CLASS (mode) == MODE_RANDOM
37765 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37766 return false;
37767 if (STACK_REGNO_P (regno))
37768 return VALID_FP_MODE_P (mode);
37769 if (MASK_REGNO_P (regno))
37770 return (VALID_MASK_REG_MODE (mode)
37771 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
37772 if (SSE_REGNO_P (regno))
37774 /* We implement the move patterns for all vector modes into and
37775 out of SSE registers, even when no operation instructions
37776 are available. */
37778 /* For AVX-512 we allow, regardless of regno:
37779 - XI mode
37780 - any of 512-bit wide vector mode
37781 - any scalar mode. */
37782 if (TARGET_AVX512F
37783 && (mode == XImode
37784 || VALID_AVX512F_REG_MODE (mode)
37785 || VALID_AVX512F_SCALAR_MODE (mode)))
37786 return true;
37788 /* TODO check for QI/HI scalars. */
37789 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
37790 if (TARGET_AVX512VL
37791 && (mode == OImode
37792 || mode == TImode
37793 || VALID_AVX256_REG_MODE (mode)
37794 || VALID_AVX512VL_128_REG_MODE (mode)))
37795 return true;
37797 /* xmm16-xmm31 are only available for AVX-512. */
37798 if (EXT_REX_SSE_REGNO_P (regno))
37799 return false;
37801 /* OImode and AVX modes are available only when AVX is enabled. */
37802 return ((TARGET_AVX
37803 && VALID_AVX256_REG_OR_OI_MODE (mode))
37804 || VALID_SSE_REG_MODE (mode)
37805 || VALID_SSE2_REG_MODE (mode)
37806 || VALID_MMX_REG_MODE (mode)
37807 || VALID_MMX_REG_MODE_3DNOW (mode));
37809 if (MMX_REGNO_P (regno))
37811 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37812 so if the register is available at all, then we can move data of
37813 the given mode into or out of it. */
37814 return (VALID_MMX_REG_MODE (mode)
37815 || VALID_MMX_REG_MODE_3DNOW (mode));
37818 if (mode == QImode)
37820 /* Take care for QImode values - they can be in non-QI regs,
37821 but then they do cause partial register stalls. */
37822 if (ANY_QI_REGNO_P (regno))
37823 return true;
37824 if (!TARGET_PARTIAL_REG_STALL)
37825 return true;
37826 /* LRA checks if the hard register is OK for the given mode.
37827 QImode values can live in non-QI regs, so we allow all
37828 registers here. */
37829 if (lra_in_progress)
37830 return true;
37831 return !can_create_pseudo_p ();
37833 /* We handle both integer and floats in the general purpose registers. */
37834 else if (VALID_INT_MODE_P (mode))
37835 return true;
37836 else if (VALID_FP_MODE_P (mode))
37837 return true;
37838 else if (VALID_DFP_MODE_P (mode))
37839 return true;
37840 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37841 on to use that value in smaller contexts, this can easily force a
37842 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37843 supporting DImode, allow it. */
37844 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37845 return true;
37847 return false;
37850 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37851 tieable integer mode. */
37853 static bool
37854 ix86_tieable_integer_mode_p (enum machine_mode mode)
37856 switch (mode)
37858 case HImode:
37859 case SImode:
37860 return true;
37862 case QImode:
37863 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37865 case DImode:
37866 return TARGET_64BIT;
37868 default:
37869 return false;
37873 /* Return true if MODE1 is accessible in a register that can hold MODE2
37874 without copying. That is, all register classes that can hold MODE2
37875 can also hold MODE1. */
37877 bool
37878 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37880 if (mode1 == mode2)
37881 return true;
37883 if (ix86_tieable_integer_mode_p (mode1)
37884 && ix86_tieable_integer_mode_p (mode2))
37885 return true;
37887 /* MODE2 being XFmode implies fp stack or general regs, which means we
37888 can tie any smaller floating point modes to it. Note that we do not
37889 tie this with TFmode. */
37890 if (mode2 == XFmode)
37891 return mode1 == SFmode || mode1 == DFmode;
37893 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37894 that we can tie it with SFmode. */
37895 if (mode2 == DFmode)
37896 return mode1 == SFmode;
37898 /* If MODE2 is only appropriate for an SSE register, then tie with
37899 any other mode acceptable to SSE registers. */
37900 if (GET_MODE_SIZE (mode2) == 32
37901 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37902 return (GET_MODE_SIZE (mode1) == 32
37903 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37904 if (GET_MODE_SIZE (mode2) == 16
37905 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37906 return (GET_MODE_SIZE (mode1) == 16
37907 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37909 /* If MODE2 is appropriate for an MMX register, then tie
37910 with any other mode acceptable to MMX registers. */
37911 if (GET_MODE_SIZE (mode2) == 8
37912 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37913 return (GET_MODE_SIZE (mode1) == 8
37914 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37916 return false;
37919 /* Return the cost of moving between two registers of mode MODE. */
37921 static int
37922 ix86_set_reg_reg_cost (enum machine_mode mode)
37924 unsigned int units = UNITS_PER_WORD;
37926 switch (GET_MODE_CLASS (mode))
37928 default:
37929 break;
37931 case MODE_CC:
37932 units = GET_MODE_SIZE (CCmode);
37933 break;
37935 case MODE_FLOAT:
37936 if ((TARGET_SSE && mode == TFmode)
37937 || (TARGET_80387 && mode == XFmode)
37938 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37939 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37940 units = GET_MODE_SIZE (mode);
37941 break;
37943 case MODE_COMPLEX_FLOAT:
37944 if ((TARGET_SSE && mode == TCmode)
37945 || (TARGET_80387 && mode == XCmode)
37946 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37947 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37948 units = GET_MODE_SIZE (mode);
37949 break;
37951 case MODE_VECTOR_INT:
37952 case MODE_VECTOR_FLOAT:
37953 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37954 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37955 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37956 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37957 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37958 units = GET_MODE_SIZE (mode);
37961 /* Return the cost of moving between two registers of mode MODE,
37962 assuming that the move will be in pieces of at most UNITS bytes. */
37963 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37966 /* Compute a (partial) cost for rtx X. Return true if the complete
37967 cost has been computed, and false if subexpressions should be
37968 scanned. In either case, *TOTAL contains the cost result. */
37970 static bool
37971 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37972 bool speed)
37974 rtx mask;
37975 enum rtx_code code = (enum rtx_code) code_i;
37976 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37977 enum machine_mode mode = GET_MODE (x);
37978 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37980 switch (code)
37982 case SET:
37983 if (register_operand (SET_DEST (x), VOIDmode)
37984 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37986 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37987 return true;
37989 return false;
37991 case CONST_INT:
37992 case CONST:
37993 case LABEL_REF:
37994 case SYMBOL_REF:
37995 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37996 *total = 3;
37997 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37998 *total = 2;
37999 else if (flag_pic && SYMBOLIC_CONST (x)
38000 && !(TARGET_64BIT
38001 && (GET_CODE (x) == LABEL_REF
38002 || (GET_CODE (x) == SYMBOL_REF
38003 && SYMBOL_REF_LOCAL_P (x)))))
38004 *total = 1;
38005 else
38006 *total = 0;
38007 return true;
38009 case CONST_DOUBLE:
38010 if (mode == VOIDmode)
38012 *total = 0;
38013 return true;
38015 switch (standard_80387_constant_p (x))
38017 case 1: /* 0.0 */
38018 *total = 1;
38019 return true;
38020 default: /* Other constants */
38021 *total = 2;
38022 return true;
38023 case 0:
38024 case -1:
38025 break;
38027 if (SSE_FLOAT_MODE_P (mode))
38029 case CONST_VECTOR:
38030 switch (standard_sse_constant_p (x))
38032 case 0:
38033 break;
38034 case 1: /* 0: xor eliminates false dependency */
38035 *total = 0;
38036 return true;
38037 default: /* -1: cmp contains false dependency */
38038 *total = 1;
38039 return true;
38042 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38043 it'll probably end up. Add a penalty for size. */
38044 *total = (COSTS_N_INSNS (1)
38045 + (flag_pic != 0 && !TARGET_64BIT)
38046 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
38047 return true;
38049 case ZERO_EXTEND:
38050 /* The zero extensions is often completely free on x86_64, so make
38051 it as cheap as possible. */
38052 if (TARGET_64BIT && mode == DImode
38053 && GET_MODE (XEXP (x, 0)) == SImode)
38054 *total = 1;
38055 else if (TARGET_ZERO_EXTEND_WITH_AND)
38056 *total = cost->add;
38057 else
38058 *total = cost->movzx;
38059 return false;
38061 case SIGN_EXTEND:
38062 *total = cost->movsx;
38063 return false;
38065 case ASHIFT:
38066 if (SCALAR_INT_MODE_P (mode)
38067 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38068 && CONST_INT_P (XEXP (x, 1)))
38070 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38071 if (value == 1)
38073 *total = cost->add;
38074 return false;
38076 if ((value == 2 || value == 3)
38077 && cost->lea <= cost->shift_const)
38079 *total = cost->lea;
38080 return false;
38083 /* FALLTHRU */
38085 case ROTATE:
38086 case ASHIFTRT:
38087 case LSHIFTRT:
38088 case ROTATERT:
38089 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38091 /* ??? Should be SSE vector operation cost. */
38092 /* At least for published AMD latencies, this really is the same
38093 as the latency for a simple fpu operation like fabs. */
38094 /* V*QImode is emulated with 1-11 insns. */
38095 if (mode == V16QImode || mode == V32QImode)
38097 int count = 11;
38098 if (TARGET_XOP && mode == V16QImode)
38100 /* For XOP we use vpshab, which requires a broadcast of the
38101 value to the variable shift insn. For constants this
38102 means a V16Q const in mem; even when we can perform the
38103 shift with one insn set the cost to prefer paddb. */
38104 if (CONSTANT_P (XEXP (x, 1)))
38106 *total = (cost->fabs
38107 + rtx_cost (XEXP (x, 0), code, 0, speed)
38108 + (speed ? 2 : COSTS_N_BYTES (16)));
38109 return true;
38111 count = 3;
38113 else if (TARGET_SSSE3)
38114 count = 7;
38115 *total = cost->fabs * count;
38117 else
38118 *total = cost->fabs;
38120 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38122 if (CONST_INT_P (XEXP (x, 1)))
38124 if (INTVAL (XEXP (x, 1)) > 32)
38125 *total = cost->shift_const + COSTS_N_INSNS (2);
38126 else
38127 *total = cost->shift_const * 2;
38129 else
38131 if (GET_CODE (XEXP (x, 1)) == AND)
38132 *total = cost->shift_var * 2;
38133 else
38134 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38137 else
38139 if (CONST_INT_P (XEXP (x, 1)))
38140 *total = cost->shift_const;
38141 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38142 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38144 /* Return the cost after shift-and truncation. */
38145 *total = cost->shift_var;
38146 return true;
38148 else
38149 *total = cost->shift_var;
38151 return false;
38153 case FMA:
38155 rtx sub;
38157 gcc_assert (FLOAT_MODE_P (mode));
38158 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38160 /* ??? SSE scalar/vector cost should be used here. */
38161 /* ??? Bald assumption that fma has the same cost as fmul. */
38162 *total = cost->fmul;
38163 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38165 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38166 sub = XEXP (x, 0);
38167 if (GET_CODE (sub) == NEG)
38168 sub = XEXP (sub, 0);
38169 *total += rtx_cost (sub, FMA, 0, speed);
38171 sub = XEXP (x, 2);
38172 if (GET_CODE (sub) == NEG)
38173 sub = XEXP (sub, 0);
38174 *total += rtx_cost (sub, FMA, 2, speed);
38175 return true;
38178 case MULT:
38179 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38181 /* ??? SSE scalar cost should be used here. */
38182 *total = cost->fmul;
38183 return false;
38185 else if (X87_FLOAT_MODE_P (mode))
38187 *total = cost->fmul;
38188 return false;
38190 else if (FLOAT_MODE_P (mode))
38192 /* ??? SSE vector cost should be used here. */
38193 *total = cost->fmul;
38194 return false;
38196 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38198 /* V*QImode is emulated with 7-13 insns. */
38199 if (mode == V16QImode || mode == V32QImode)
38201 int extra = 11;
38202 if (TARGET_XOP && mode == V16QImode)
38203 extra = 5;
38204 else if (TARGET_SSSE3)
38205 extra = 6;
38206 *total = cost->fmul * 2 + cost->fabs * extra;
38208 /* V*DImode is emulated with 5-8 insns. */
38209 else if (mode == V2DImode || mode == V4DImode)
38211 if (TARGET_XOP && mode == V2DImode)
38212 *total = cost->fmul * 2 + cost->fabs * 3;
38213 else
38214 *total = cost->fmul * 3 + cost->fabs * 5;
38216 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38217 insns, including two PMULUDQ. */
38218 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38219 *total = cost->fmul * 2 + cost->fabs * 5;
38220 else
38221 *total = cost->fmul;
38222 return false;
38224 else
38226 rtx op0 = XEXP (x, 0);
38227 rtx op1 = XEXP (x, 1);
38228 int nbits;
38229 if (CONST_INT_P (XEXP (x, 1)))
38231 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38232 for (nbits = 0; value != 0; value &= value - 1)
38233 nbits++;
38235 else
38236 /* This is arbitrary. */
38237 nbits = 7;
38239 /* Compute costs correctly for widening multiplication. */
38240 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38241 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38242 == GET_MODE_SIZE (mode))
38244 int is_mulwiden = 0;
38245 enum machine_mode inner_mode = GET_MODE (op0);
38247 if (GET_CODE (op0) == GET_CODE (op1))
38248 is_mulwiden = 1, op1 = XEXP (op1, 0);
38249 else if (CONST_INT_P (op1))
38251 if (GET_CODE (op0) == SIGN_EXTEND)
38252 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38253 == INTVAL (op1);
38254 else
38255 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38258 if (is_mulwiden)
38259 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38262 *total = (cost->mult_init[MODE_INDEX (mode)]
38263 + nbits * cost->mult_bit
38264 + rtx_cost (op0, outer_code, opno, speed)
38265 + rtx_cost (op1, outer_code, opno, speed));
38267 return true;
38270 case DIV:
38271 case UDIV:
38272 case MOD:
38273 case UMOD:
38274 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38275 /* ??? SSE cost should be used here. */
38276 *total = cost->fdiv;
38277 else if (X87_FLOAT_MODE_P (mode))
38278 *total = cost->fdiv;
38279 else if (FLOAT_MODE_P (mode))
38280 /* ??? SSE vector cost should be used here. */
38281 *total = cost->fdiv;
38282 else
38283 *total = cost->divide[MODE_INDEX (mode)];
38284 return false;
38286 case PLUS:
38287 if (GET_MODE_CLASS (mode) == MODE_INT
38288 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38290 if (GET_CODE (XEXP (x, 0)) == PLUS
38291 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38292 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38293 && CONSTANT_P (XEXP (x, 1)))
38295 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38296 if (val == 2 || val == 4 || val == 8)
38298 *total = cost->lea;
38299 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38300 outer_code, opno, speed);
38301 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38302 outer_code, opno, speed);
38303 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38304 return true;
38307 else if (GET_CODE (XEXP (x, 0)) == MULT
38308 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38310 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38311 if (val == 2 || val == 4 || val == 8)
38313 *total = cost->lea;
38314 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38315 outer_code, opno, speed);
38316 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38317 return true;
38320 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38322 *total = cost->lea;
38323 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38324 outer_code, opno, speed);
38325 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38326 outer_code, opno, speed);
38327 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38328 return true;
38331 /* FALLTHRU */
38333 case MINUS:
38334 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38336 /* ??? SSE cost should be used here. */
38337 *total = cost->fadd;
38338 return false;
38340 else if (X87_FLOAT_MODE_P (mode))
38342 *total = cost->fadd;
38343 return false;
38345 else if (FLOAT_MODE_P (mode))
38347 /* ??? SSE vector cost should be used here. */
38348 *total = cost->fadd;
38349 return false;
38351 /* FALLTHRU */
38353 case AND:
38354 case IOR:
38355 case XOR:
38356 if (GET_MODE_CLASS (mode) == MODE_INT
38357 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38359 *total = (cost->add * 2
38360 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38361 << (GET_MODE (XEXP (x, 0)) != DImode))
38362 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38363 << (GET_MODE (XEXP (x, 1)) != DImode)));
38364 return true;
38366 /* FALLTHRU */
38368 case NEG:
38369 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38371 /* ??? SSE cost should be used here. */
38372 *total = cost->fchs;
38373 return false;
38375 else if (X87_FLOAT_MODE_P (mode))
38377 *total = cost->fchs;
38378 return false;
38380 else if (FLOAT_MODE_P (mode))
38382 /* ??? SSE vector cost should be used here. */
38383 *total = cost->fchs;
38384 return false;
38386 /* FALLTHRU */
38388 case NOT:
38389 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38391 /* ??? Should be SSE vector operation cost. */
38392 /* At least for published AMD latencies, this really is the same
38393 as the latency for a simple fpu operation like fabs. */
38394 *total = cost->fabs;
38396 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38397 *total = cost->add * 2;
38398 else
38399 *total = cost->add;
38400 return false;
38402 case COMPARE:
38403 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38404 && XEXP (XEXP (x, 0), 1) == const1_rtx
38405 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38406 && XEXP (x, 1) == const0_rtx)
38408 /* This kind of construct is implemented using test[bwl].
38409 Treat it as if we had an AND. */
38410 *total = (cost->add
38411 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38412 + rtx_cost (const1_rtx, outer_code, opno, speed));
38413 return true;
38415 return false;
38417 case FLOAT_EXTEND:
38418 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38419 *total = 0;
38420 return false;
38422 case ABS:
38423 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38424 /* ??? SSE cost should be used here. */
38425 *total = cost->fabs;
38426 else if (X87_FLOAT_MODE_P (mode))
38427 *total = cost->fabs;
38428 else if (FLOAT_MODE_P (mode))
38429 /* ??? SSE vector cost should be used here. */
38430 *total = cost->fabs;
38431 return false;
38433 case SQRT:
38434 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38435 /* ??? SSE cost should be used here. */
38436 *total = cost->fsqrt;
38437 else if (X87_FLOAT_MODE_P (mode))
38438 *total = cost->fsqrt;
38439 else if (FLOAT_MODE_P (mode))
38440 /* ??? SSE vector cost should be used here. */
38441 *total = cost->fsqrt;
38442 return false;
38444 case UNSPEC:
38445 if (XINT (x, 1) == UNSPEC_TP)
38446 *total = 0;
38447 return false;
38449 case VEC_SELECT:
38450 case VEC_CONCAT:
38451 case VEC_DUPLICATE:
38452 /* ??? Assume all of these vector manipulation patterns are
38453 recognizable. In which case they all pretty much have the
38454 same cost. */
38455 *total = cost->fabs;
38456 return true;
38457 case VEC_MERGE:
38458 mask = XEXP (x, 2);
38459 /* This is masked instruction, assume the same cost,
38460 as nonmasked variant. */
38461 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38462 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38463 else
38464 *total = cost->fabs;
38465 return true;
38467 default:
38468 return false;
38472 #if TARGET_MACHO
38474 static int current_machopic_label_num;
38476 /* Given a symbol name and its associated stub, write out the
38477 definition of the stub. */
38479 void
38480 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38482 unsigned int length;
38483 char *binder_name, *symbol_name, lazy_ptr_name[32];
38484 int label = ++current_machopic_label_num;
38486 /* For 64-bit we shouldn't get here. */
38487 gcc_assert (!TARGET_64BIT);
38489 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38490 symb = targetm.strip_name_encoding (symb);
38492 length = strlen (stub);
38493 binder_name = XALLOCAVEC (char, length + 32);
38494 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38496 length = strlen (symb);
38497 symbol_name = XALLOCAVEC (char, length + 32);
38498 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38500 sprintf (lazy_ptr_name, "L%d$lz", label);
38502 if (MACHOPIC_ATT_STUB)
38503 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38504 else if (MACHOPIC_PURE)
38505 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38506 else
38507 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38509 fprintf (file, "%s:\n", stub);
38510 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38512 if (MACHOPIC_ATT_STUB)
38514 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38516 else if (MACHOPIC_PURE)
38518 /* PIC stub. */
38519 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38520 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38521 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38522 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38523 label, lazy_ptr_name, label);
38524 fprintf (file, "\tjmp\t*%%ecx\n");
38526 else
38527 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38529 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38530 it needs no stub-binding-helper. */
38531 if (MACHOPIC_ATT_STUB)
38532 return;
38534 fprintf (file, "%s:\n", binder_name);
38536 if (MACHOPIC_PURE)
38538 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38539 fprintf (file, "\tpushl\t%%ecx\n");
38541 else
38542 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38544 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38546 /* N.B. Keep the correspondence of these
38547 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38548 old-pic/new-pic/non-pic stubs; altering this will break
38549 compatibility with existing dylibs. */
38550 if (MACHOPIC_PURE)
38552 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38553 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38555 else
38556 /* 16-byte -mdynamic-no-pic stub. */
38557 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38559 fprintf (file, "%s:\n", lazy_ptr_name);
38560 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38561 fprintf (file, ASM_LONG "%s\n", binder_name);
38563 #endif /* TARGET_MACHO */
38565 /* Order the registers for register allocator. */
38567 void
38568 x86_order_regs_for_local_alloc (void)
38570 int pos = 0;
38571 int i;
38573 /* First allocate the local general purpose registers. */
38574 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38575 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38576 reg_alloc_order [pos++] = i;
38578 /* Global general purpose registers. */
38579 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38580 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38581 reg_alloc_order [pos++] = i;
38583 /* x87 registers come first in case we are doing FP math
38584 using them. */
38585 if (!TARGET_SSE_MATH)
38586 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38587 reg_alloc_order [pos++] = i;
38589 /* SSE registers. */
38590 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38591 reg_alloc_order [pos++] = i;
38592 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38593 reg_alloc_order [pos++] = i;
38595 /* Extended REX SSE registers. */
38596 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38597 reg_alloc_order [pos++] = i;
38599 /* Mask register. */
38600 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38601 reg_alloc_order [pos++] = i;
38603 /* x87 registers. */
38604 if (TARGET_SSE_MATH)
38605 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38606 reg_alloc_order [pos++] = i;
38608 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38609 reg_alloc_order [pos++] = i;
38611 /* Initialize the rest of array as we do not allocate some registers
38612 at all. */
38613 while (pos < FIRST_PSEUDO_REGISTER)
38614 reg_alloc_order [pos++] = 0;
38617 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38618 in struct attribute_spec handler. */
38619 static tree
38620 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38621 tree args,
38622 int,
38623 bool *no_add_attrs)
38625 if (TREE_CODE (*node) != FUNCTION_TYPE
38626 && TREE_CODE (*node) != METHOD_TYPE
38627 && TREE_CODE (*node) != FIELD_DECL
38628 && TREE_CODE (*node) != TYPE_DECL)
38630 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38631 name);
38632 *no_add_attrs = true;
38633 return NULL_TREE;
38635 if (TARGET_64BIT)
38637 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38638 name);
38639 *no_add_attrs = true;
38640 return NULL_TREE;
38642 if (is_attribute_p ("callee_pop_aggregate_return", name))
38644 tree cst;
38646 cst = TREE_VALUE (args);
38647 if (TREE_CODE (cst) != INTEGER_CST)
38649 warning (OPT_Wattributes,
38650 "%qE attribute requires an integer constant argument",
38651 name);
38652 *no_add_attrs = true;
38654 else if (compare_tree_int (cst, 0) != 0
38655 && compare_tree_int (cst, 1) != 0)
38657 warning (OPT_Wattributes,
38658 "argument to %qE attribute is neither zero, nor one",
38659 name);
38660 *no_add_attrs = true;
38663 return NULL_TREE;
38666 return NULL_TREE;
38669 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38670 struct attribute_spec.handler. */
38671 static tree
38672 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38673 bool *no_add_attrs)
38675 if (TREE_CODE (*node) != FUNCTION_TYPE
38676 && TREE_CODE (*node) != METHOD_TYPE
38677 && TREE_CODE (*node) != FIELD_DECL
38678 && TREE_CODE (*node) != TYPE_DECL)
38680 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38681 name);
38682 *no_add_attrs = true;
38683 return NULL_TREE;
38686 /* Can combine regparm with all attributes but fastcall. */
38687 if (is_attribute_p ("ms_abi", name))
38689 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38691 error ("ms_abi and sysv_abi attributes are not compatible");
38694 return NULL_TREE;
38696 else if (is_attribute_p ("sysv_abi", name))
38698 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38700 error ("ms_abi and sysv_abi attributes are not compatible");
38703 return NULL_TREE;
38706 return NULL_TREE;
38709 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38710 struct attribute_spec.handler. */
38711 static tree
38712 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38713 bool *no_add_attrs)
38715 tree *type = NULL;
38716 if (DECL_P (*node))
38718 if (TREE_CODE (*node) == TYPE_DECL)
38719 type = &TREE_TYPE (*node);
38721 else
38722 type = node;
38724 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38726 warning (OPT_Wattributes, "%qE attribute ignored",
38727 name);
38728 *no_add_attrs = true;
38731 else if ((is_attribute_p ("ms_struct", name)
38732 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38733 || ((is_attribute_p ("gcc_struct", name)
38734 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38736 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38737 name);
38738 *no_add_attrs = true;
38741 return NULL_TREE;
38744 static tree
38745 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38746 bool *no_add_attrs)
38748 if (TREE_CODE (*node) != FUNCTION_DECL)
38750 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38751 name);
38752 *no_add_attrs = true;
38754 return NULL_TREE;
38757 static bool
38758 ix86_ms_bitfield_layout_p (const_tree record_type)
38760 return ((TARGET_MS_BITFIELD_LAYOUT
38761 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38762 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38765 /* Returns an expression indicating where the this parameter is
38766 located on entry to the FUNCTION. */
38768 static rtx
38769 x86_this_parameter (tree function)
38771 tree type = TREE_TYPE (function);
38772 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38773 int nregs;
38775 if (TARGET_64BIT)
38777 const int *parm_regs;
38779 if (ix86_function_type_abi (type) == MS_ABI)
38780 parm_regs = x86_64_ms_abi_int_parameter_registers;
38781 else
38782 parm_regs = x86_64_int_parameter_registers;
38783 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38786 nregs = ix86_function_regparm (type, function);
38788 if (nregs > 0 && !stdarg_p (type))
38790 int regno;
38791 unsigned int ccvt = ix86_get_callcvt (type);
38793 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38794 regno = aggr ? DX_REG : CX_REG;
38795 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38797 regno = CX_REG;
38798 if (aggr)
38799 return gen_rtx_MEM (SImode,
38800 plus_constant (Pmode, stack_pointer_rtx, 4));
38802 else
38804 regno = AX_REG;
38805 if (aggr)
38807 regno = DX_REG;
38808 if (nregs == 1)
38809 return gen_rtx_MEM (SImode,
38810 plus_constant (Pmode,
38811 stack_pointer_rtx, 4));
38814 return gen_rtx_REG (SImode, regno);
38817 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38818 aggr ? 8 : 4));
38821 /* Determine whether x86_output_mi_thunk can succeed. */
38823 static bool
38824 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
38825 const_tree function)
38827 /* 64-bit can handle anything. */
38828 if (TARGET_64BIT)
38829 return true;
38831 /* For 32-bit, everything's fine if we have one free register. */
38832 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38833 return true;
38835 /* Need a free register for vcall_offset. */
38836 if (vcall_offset)
38837 return false;
38839 /* Need a free register for GOT references. */
38840 if (flag_pic && !targetm.binds_local_p (function))
38841 return false;
38843 /* Otherwise ok. */
38844 return true;
38847 /* Output the assembler code for a thunk function. THUNK_DECL is the
38848 declaration for the thunk function itself, FUNCTION is the decl for
38849 the target function. DELTA is an immediate constant offset to be
38850 added to THIS. If VCALL_OFFSET is nonzero, the word at
38851 *(*this + vcall_offset) should be added to THIS. */
38853 static void
38854 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
38855 HOST_WIDE_INT vcall_offset, tree function)
38857 rtx this_param = x86_this_parameter (function);
38858 rtx this_reg, tmp, fnaddr;
38859 unsigned int tmp_regno;
38860 rtx_insn *insn;
38862 if (TARGET_64BIT)
38863 tmp_regno = R10_REG;
38864 else
38866 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38867 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38868 tmp_regno = AX_REG;
38869 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38870 tmp_regno = DX_REG;
38871 else
38872 tmp_regno = CX_REG;
38875 emit_note (NOTE_INSN_PROLOGUE_END);
38877 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38878 pull it in now and let DELTA benefit. */
38879 if (REG_P (this_param))
38880 this_reg = this_param;
38881 else if (vcall_offset)
38883 /* Put the this parameter into %eax. */
38884 this_reg = gen_rtx_REG (Pmode, AX_REG);
38885 emit_move_insn (this_reg, this_param);
38887 else
38888 this_reg = NULL_RTX;
38890 /* Adjust the this parameter by a fixed constant. */
38891 if (delta)
38893 rtx delta_rtx = GEN_INT (delta);
38894 rtx delta_dst = this_reg ? this_reg : this_param;
38896 if (TARGET_64BIT)
38898 if (!x86_64_general_operand (delta_rtx, Pmode))
38900 tmp = gen_rtx_REG (Pmode, tmp_regno);
38901 emit_move_insn (tmp, delta_rtx);
38902 delta_rtx = tmp;
38906 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38909 /* Adjust the this parameter by a value stored in the vtable. */
38910 if (vcall_offset)
38912 rtx vcall_addr, vcall_mem, this_mem;
38914 tmp = gen_rtx_REG (Pmode, tmp_regno);
38916 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38917 if (Pmode != ptr_mode)
38918 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38919 emit_move_insn (tmp, this_mem);
38921 /* Adjust the this parameter. */
38922 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38923 if (TARGET_64BIT
38924 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38926 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38927 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38928 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38931 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38932 if (Pmode != ptr_mode)
38933 emit_insn (gen_addsi_1_zext (this_reg,
38934 gen_rtx_REG (ptr_mode,
38935 REGNO (this_reg)),
38936 vcall_mem));
38937 else
38938 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38941 /* If necessary, drop THIS back to its stack slot. */
38942 if (this_reg && this_reg != this_param)
38943 emit_move_insn (this_param, this_reg);
38945 fnaddr = XEXP (DECL_RTL (function), 0);
38946 if (TARGET_64BIT)
38948 if (!flag_pic || targetm.binds_local_p (function)
38949 || TARGET_PECOFF)
38951 else
38953 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38954 tmp = gen_rtx_CONST (Pmode, tmp);
38955 fnaddr = gen_const_mem (Pmode, tmp);
38958 else
38960 if (!flag_pic || targetm.binds_local_p (function))
38962 #if TARGET_MACHO
38963 else if (TARGET_MACHO)
38965 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38966 fnaddr = XEXP (fnaddr, 0);
38968 #endif /* TARGET_MACHO */
38969 else
38971 tmp = gen_rtx_REG (Pmode, CX_REG);
38972 output_set_got (tmp, NULL_RTX);
38974 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38975 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38976 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38977 fnaddr = gen_const_mem (Pmode, fnaddr);
38981 /* Our sibling call patterns do not allow memories, because we have no
38982 predicate that can distinguish between frame and non-frame memory.
38983 For our purposes here, we can get away with (ab)using a jump pattern,
38984 because we're going to do no optimization. */
38985 if (MEM_P (fnaddr))
38987 if (sibcall_insn_operand (fnaddr, word_mode))
38989 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
38990 tmp = emit_call_insn (tmp);
38991 SIBLING_CALL_P (tmp) = 1;
38993 else
38994 emit_jump_insn (gen_indirect_jump (fnaddr));
38996 else
38998 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38999 fnaddr = legitimize_pic_address (fnaddr,
39000 gen_rtx_REG (Pmode, tmp_regno));
39002 if (!sibcall_insn_operand (fnaddr, word_mode))
39004 tmp = gen_rtx_REG (word_mode, tmp_regno);
39005 if (GET_MODE (fnaddr) != word_mode)
39006 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
39007 emit_move_insn (tmp, fnaddr);
39008 fnaddr = tmp;
39011 tmp = gen_rtx_MEM (QImode, fnaddr);
39012 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39013 tmp = emit_call_insn (tmp);
39014 SIBLING_CALL_P (tmp) = 1;
39016 emit_barrier ();
39018 /* Emit just enough of rest_of_compilation to get the insns emitted.
39019 Note that use_thunk calls assemble_start_function et al. */
39020 insn = get_insns ();
39021 shorten_branches (insn);
39022 final_start_function (insn, file, 1);
39023 final (insn, file, 1);
39024 final_end_function ();
39027 static void
39028 x86_file_start (void)
39030 default_file_start ();
39031 if (TARGET_16BIT)
39032 fputs ("\t.code16gcc\n", asm_out_file);
39033 #if TARGET_MACHO
39034 darwin_file_start ();
39035 #endif
39036 if (X86_FILE_START_VERSION_DIRECTIVE)
39037 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39038 if (X86_FILE_START_FLTUSED)
39039 fputs ("\t.global\t__fltused\n", asm_out_file);
39040 if (ix86_asm_dialect == ASM_INTEL)
39041 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
39045 x86_field_alignment (tree field, int computed)
39047 enum machine_mode mode;
39048 tree type = TREE_TYPE (field);
39050 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
39051 return computed;
39052 mode = TYPE_MODE (strip_array_types (type));
39053 if (mode == DFmode || mode == DCmode
39054 || GET_MODE_CLASS (mode) == MODE_INT
39055 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
39056 return MIN (32, computed);
39057 return computed;
39060 /* Output assembler code to FILE to increment profiler label # LABELNO
39061 for profiling a function entry. */
39062 void
39063 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
39065 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
39066 : MCOUNT_NAME);
39068 if (TARGET_64BIT)
39070 #ifndef NO_PROFILE_COUNTERS
39071 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
39072 #endif
39074 if (!TARGET_PECOFF && flag_pic)
39075 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
39076 else
39077 fprintf (file, "\tcall\t%s\n", mcount_name);
39079 else if (flag_pic)
39081 #ifndef NO_PROFILE_COUNTERS
39082 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39083 LPREFIX, labelno);
39084 #endif
39085 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39087 else
39089 #ifndef NO_PROFILE_COUNTERS
39090 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39091 LPREFIX, labelno);
39092 #endif
39093 fprintf (file, "\tcall\t%s\n", mcount_name);
39097 /* We don't have exact information about the insn sizes, but we may assume
39098 quite safely that we are informed about all 1 byte insns and memory
39099 address sizes. This is enough to eliminate unnecessary padding in
39100 99% of cases. */
39102 static int
39103 min_insn_size (rtx insn)
39105 int l = 0, len;
39107 if (!INSN_P (insn) || !active_insn_p (insn))
39108 return 0;
39110 /* Discard alignments we've emit and jump instructions. */
39111 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39112 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39113 return 0;
39115 /* Important case - calls are always 5 bytes.
39116 It is common to have many calls in the row. */
39117 if (CALL_P (insn)
39118 && symbolic_reference_mentioned_p (PATTERN (insn))
39119 && !SIBLING_CALL_P (insn))
39120 return 5;
39121 len = get_attr_length (insn);
39122 if (len <= 1)
39123 return 1;
39125 /* For normal instructions we rely on get_attr_length being exact,
39126 with a few exceptions. */
39127 if (!JUMP_P (insn))
39129 enum attr_type type = get_attr_type (insn);
39131 switch (type)
39133 case TYPE_MULTI:
39134 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39135 || asm_noperands (PATTERN (insn)) >= 0)
39136 return 0;
39137 break;
39138 case TYPE_OTHER:
39139 case TYPE_FCMP:
39140 break;
39141 default:
39142 /* Otherwise trust get_attr_length. */
39143 return len;
39146 l = get_attr_length_address (insn);
39147 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39148 l = 4;
39150 if (l)
39151 return 1+l;
39152 else
39153 return 2;
39156 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39158 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39159 window. */
39161 static void
39162 ix86_avoid_jump_mispredicts (void)
39164 rtx_insn *insn, *start = get_insns ();
39165 int nbytes = 0, njumps = 0;
39166 int isjump = 0;
39168 /* Look for all minimal intervals of instructions containing 4 jumps.
39169 The intervals are bounded by START and INSN. NBYTES is the total
39170 size of instructions in the interval including INSN and not including
39171 START. When the NBYTES is smaller than 16 bytes, it is possible
39172 that the end of START and INSN ends up in the same 16byte page.
39174 The smallest offset in the page INSN can start is the case where START
39175 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39176 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39178 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39179 have to, control transfer to label(s) can be performed through other
39180 means, and also we estimate minimum length of all asm stmts as 0. */
39181 for (insn = start; insn; insn = NEXT_INSN (insn))
39183 int min_size;
39185 if (LABEL_P (insn))
39187 int align = label_to_alignment (insn);
39188 int max_skip = label_to_max_skip (insn);
39190 if (max_skip > 15)
39191 max_skip = 15;
39192 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39193 already in the current 16 byte page, because otherwise
39194 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39195 bytes to reach 16 byte boundary. */
39196 if (align <= 0
39197 || (align <= 3 && max_skip != (1 << align) - 1))
39198 max_skip = 0;
39199 if (dump_file)
39200 fprintf (dump_file, "Label %i with max_skip %i\n",
39201 INSN_UID (insn), max_skip);
39202 if (max_skip)
39204 while (nbytes + max_skip >= 16)
39206 start = NEXT_INSN (start);
39207 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39208 || CALL_P (start))
39209 njumps--, isjump = 1;
39210 else
39211 isjump = 0;
39212 nbytes -= min_insn_size (start);
39215 continue;
39218 min_size = min_insn_size (insn);
39219 nbytes += min_size;
39220 if (dump_file)
39221 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39222 INSN_UID (insn), min_size);
39223 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39224 || CALL_P (insn))
39225 njumps++;
39226 else
39227 continue;
39229 while (njumps > 3)
39231 start = NEXT_INSN (start);
39232 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39233 || CALL_P (start))
39234 njumps--, isjump = 1;
39235 else
39236 isjump = 0;
39237 nbytes -= min_insn_size (start);
39239 gcc_assert (njumps >= 0);
39240 if (dump_file)
39241 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39242 INSN_UID (start), INSN_UID (insn), nbytes);
39244 if (njumps == 3 && isjump && nbytes < 16)
39246 int padsize = 15 - nbytes + min_insn_size (insn);
39248 if (dump_file)
39249 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39250 INSN_UID (insn), padsize);
39251 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39255 #endif
39257 /* AMD Athlon works faster
39258 when RET is not destination of conditional jump or directly preceded
39259 by other jump instruction. We avoid the penalty by inserting NOP just
39260 before the RET instructions in such cases. */
39261 static void
39262 ix86_pad_returns (void)
39264 edge e;
39265 edge_iterator ei;
39267 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39269 basic_block bb = e->src;
39270 rtx_insn *ret = BB_END (bb);
39271 rtx_insn *prev;
39272 bool replace = false;
39274 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39275 || optimize_bb_for_size_p (bb))
39276 continue;
39277 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39278 if (active_insn_p (prev) || LABEL_P (prev))
39279 break;
39280 if (prev && LABEL_P (prev))
39282 edge e;
39283 edge_iterator ei;
39285 FOR_EACH_EDGE (e, ei, bb->preds)
39286 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39287 && !(e->flags & EDGE_FALLTHRU))
39289 replace = true;
39290 break;
39293 if (!replace)
39295 prev = prev_active_insn (ret);
39296 if (prev
39297 && ((JUMP_P (prev) && any_condjump_p (prev))
39298 || CALL_P (prev)))
39299 replace = true;
39300 /* Empty functions get branch mispredict even when
39301 the jump destination is not visible to us. */
39302 if (!prev && !optimize_function_for_size_p (cfun))
39303 replace = true;
39305 if (replace)
39307 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39308 delete_insn (ret);
39313 /* Count the minimum number of instructions in BB. Return 4 if the
39314 number of instructions >= 4. */
39316 static int
39317 ix86_count_insn_bb (basic_block bb)
39319 rtx_insn *insn;
39320 int insn_count = 0;
39322 /* Count number of instructions in this block. Return 4 if the number
39323 of instructions >= 4. */
39324 FOR_BB_INSNS (bb, insn)
39326 /* Only happen in exit blocks. */
39327 if (JUMP_P (insn)
39328 && ANY_RETURN_P (PATTERN (insn)))
39329 break;
39331 if (NONDEBUG_INSN_P (insn)
39332 && GET_CODE (PATTERN (insn)) != USE
39333 && GET_CODE (PATTERN (insn)) != CLOBBER)
39335 insn_count++;
39336 if (insn_count >= 4)
39337 return insn_count;
39341 return insn_count;
39345 /* Count the minimum number of instructions in code path in BB.
39346 Return 4 if the number of instructions >= 4. */
39348 static int
39349 ix86_count_insn (basic_block bb)
39351 edge e;
39352 edge_iterator ei;
39353 int min_prev_count;
39355 /* Only bother counting instructions along paths with no
39356 more than 2 basic blocks between entry and exit. Given
39357 that BB has an edge to exit, determine if a predecessor
39358 of BB has an edge from entry. If so, compute the number
39359 of instructions in the predecessor block. If there
39360 happen to be multiple such blocks, compute the minimum. */
39361 min_prev_count = 4;
39362 FOR_EACH_EDGE (e, ei, bb->preds)
39364 edge prev_e;
39365 edge_iterator prev_ei;
39367 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39369 min_prev_count = 0;
39370 break;
39372 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39374 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39376 int count = ix86_count_insn_bb (e->src);
39377 if (count < min_prev_count)
39378 min_prev_count = count;
39379 break;
39384 if (min_prev_count < 4)
39385 min_prev_count += ix86_count_insn_bb (bb);
39387 return min_prev_count;
39390 /* Pad short function to 4 instructions. */
39392 static void
39393 ix86_pad_short_function (void)
39395 edge e;
39396 edge_iterator ei;
39398 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39400 rtx_insn *ret = BB_END (e->src);
39401 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39403 int insn_count = ix86_count_insn (e->src);
39405 /* Pad short function. */
39406 if (insn_count < 4)
39408 rtx_insn *insn = ret;
39410 /* Find epilogue. */
39411 while (insn
39412 && (!NOTE_P (insn)
39413 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39414 insn = PREV_INSN (insn);
39416 if (!insn)
39417 insn = ret;
39419 /* Two NOPs count as one instruction. */
39420 insn_count = 2 * (4 - insn_count);
39421 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39427 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39428 the epilogue, the Windows system unwinder will apply epilogue logic and
39429 produce incorrect offsets. This can be avoided by adding a nop between
39430 the last insn that can throw and the first insn of the epilogue. */
39432 static void
39433 ix86_seh_fixup_eh_fallthru (void)
39435 edge e;
39436 edge_iterator ei;
39438 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39440 rtx_insn *insn, *next;
39442 /* Find the beginning of the epilogue. */
39443 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39444 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39445 break;
39446 if (insn == NULL)
39447 continue;
39449 /* We only care about preceding insns that can throw. */
39450 insn = prev_active_insn (insn);
39451 if (insn == NULL || !can_throw_internal (insn))
39452 continue;
39454 /* Do not separate calls from their debug information. */
39455 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39456 if (NOTE_P (next)
39457 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39458 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39459 insn = next;
39460 else
39461 break;
39463 emit_insn_after (gen_nops (const1_rtx), insn);
39467 /* Implement machine specific optimizations. We implement padding of returns
39468 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39469 static void
39470 ix86_reorg (void)
39472 /* We are freeing block_for_insn in the toplev to keep compatibility
39473 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39474 compute_bb_for_insn ();
39476 if (TARGET_SEH && current_function_has_exception_handlers ())
39477 ix86_seh_fixup_eh_fallthru ();
39479 if (optimize && optimize_function_for_speed_p (cfun))
39481 if (TARGET_PAD_SHORT_FUNCTION)
39482 ix86_pad_short_function ();
39483 else if (TARGET_PAD_RETURNS)
39484 ix86_pad_returns ();
39485 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39486 if (TARGET_FOUR_JUMP_LIMIT)
39487 ix86_avoid_jump_mispredicts ();
39488 #endif
39492 /* Return nonzero when QImode register that must be represented via REX prefix
39493 is used. */
39494 bool
39495 x86_extended_QIreg_mentioned_p (rtx insn)
39497 int i;
39498 extract_insn_cached (insn);
39499 for (i = 0; i < recog_data.n_operands; i++)
39500 if (GENERAL_REG_P (recog_data.operand[i])
39501 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39502 return true;
39503 return false;
39506 /* Return nonzero when P points to register encoded via REX prefix.
39507 Called via for_each_rtx. */
39508 static int
39509 extended_reg_mentioned_1 (rtx *p, void *)
39511 unsigned int regno;
39512 if (!REG_P (*p))
39513 return 0;
39514 regno = REGNO (*p);
39515 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39518 /* Return true when INSN mentions register that must be encoded using REX
39519 prefix. */
39520 bool
39521 x86_extended_reg_mentioned_p (rtx insn)
39523 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39524 extended_reg_mentioned_1, NULL);
39527 /* If profitable, negate (without causing overflow) integer constant
39528 of mode MODE at location LOC. Return true in this case. */
39529 bool
39530 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39532 HOST_WIDE_INT val;
39534 if (!CONST_INT_P (*loc))
39535 return false;
39537 switch (mode)
39539 case DImode:
39540 /* DImode x86_64 constants must fit in 32 bits. */
39541 gcc_assert (x86_64_immediate_operand (*loc, mode));
39543 mode = SImode;
39544 break;
39546 case SImode:
39547 case HImode:
39548 case QImode:
39549 break;
39551 default:
39552 gcc_unreachable ();
39555 /* Avoid overflows. */
39556 if (mode_signbit_p (mode, *loc))
39557 return false;
39559 val = INTVAL (*loc);
39561 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39562 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39563 if ((val < 0 && val != -128)
39564 || val == 128)
39566 *loc = GEN_INT (-val);
39567 return true;
39570 return false;
39573 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39574 optabs would emit if we didn't have TFmode patterns. */
39576 void
39577 x86_emit_floatuns (rtx operands[2])
39579 rtx_code_label *neglab, *donelab;
39580 rtx i0, i1, f0, in, out;
39581 enum machine_mode mode, inmode;
39583 inmode = GET_MODE (operands[1]);
39584 gcc_assert (inmode == SImode || inmode == DImode);
39586 out = operands[0];
39587 in = force_reg (inmode, operands[1]);
39588 mode = GET_MODE (out);
39589 neglab = gen_label_rtx ();
39590 donelab = gen_label_rtx ();
39591 f0 = gen_reg_rtx (mode);
39593 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39595 expand_float (out, in, 0);
39597 emit_jump_insn (gen_jump (donelab));
39598 emit_barrier ();
39600 emit_label (neglab);
39602 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39603 1, OPTAB_DIRECT);
39604 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39605 1, OPTAB_DIRECT);
39606 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39608 expand_float (f0, i0, 0);
39610 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39612 emit_label (donelab);
39615 /* AVX512F does support 64-byte integer vector operations,
39616 thus the longest vector we are faced with is V64QImode. */
39617 #define MAX_VECT_LEN 64
39619 struct expand_vec_perm_d
39621 rtx target, op0, op1;
39622 unsigned char perm[MAX_VECT_LEN];
39623 enum machine_mode vmode;
39624 unsigned char nelt;
39625 bool one_operand_p;
39626 bool testing_p;
39629 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39630 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39631 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39633 /* Get a vector mode of the same size as the original but with elements
39634 twice as wide. This is only guaranteed to apply to integral vectors. */
39636 static inline enum machine_mode
39637 get_mode_wider_vector (enum machine_mode o)
39639 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39640 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39641 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39642 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39643 return n;
39646 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39647 fill target with val via vec_duplicate. */
39649 static bool
39650 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39652 bool ok;
39653 rtx_insn *insn;
39654 rtx dup;
39656 /* First attempt to recognize VAL as-is. */
39657 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39658 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39659 if (recog_memoized (insn) < 0)
39661 rtx_insn *seq;
39662 /* If that fails, force VAL into a register. */
39664 start_sequence ();
39665 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39666 seq = get_insns ();
39667 end_sequence ();
39668 if (seq)
39669 emit_insn_before (seq, insn);
39671 ok = recog_memoized (insn) >= 0;
39672 gcc_assert (ok);
39674 return true;
39677 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39678 with all elements equal to VAR. Return true if successful. */
39680 static bool
39681 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39682 rtx target, rtx val)
39684 bool ok;
39686 switch (mode)
39688 case V2SImode:
39689 case V2SFmode:
39690 if (!mmx_ok)
39691 return false;
39692 /* FALLTHRU */
39694 case V4DFmode:
39695 case V4DImode:
39696 case V8SFmode:
39697 case V8SImode:
39698 case V2DFmode:
39699 case V2DImode:
39700 case V4SFmode:
39701 case V4SImode:
39702 case V16SImode:
39703 case V8DImode:
39704 case V16SFmode:
39705 case V8DFmode:
39706 return ix86_vector_duplicate_value (mode, target, val);
39708 case V4HImode:
39709 if (!mmx_ok)
39710 return false;
39711 if (TARGET_SSE || TARGET_3DNOW_A)
39713 rtx x;
39715 val = gen_lowpart (SImode, val);
39716 x = gen_rtx_TRUNCATE (HImode, val);
39717 x = gen_rtx_VEC_DUPLICATE (mode, x);
39718 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39719 return true;
39721 goto widen;
39723 case V8QImode:
39724 if (!mmx_ok)
39725 return false;
39726 goto widen;
39728 case V8HImode:
39729 if (TARGET_SSE2)
39731 struct expand_vec_perm_d dperm;
39732 rtx tmp1, tmp2;
39734 permute:
39735 memset (&dperm, 0, sizeof (dperm));
39736 dperm.target = target;
39737 dperm.vmode = mode;
39738 dperm.nelt = GET_MODE_NUNITS (mode);
39739 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39740 dperm.one_operand_p = true;
39742 /* Extend to SImode using a paradoxical SUBREG. */
39743 tmp1 = gen_reg_rtx (SImode);
39744 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39746 /* Insert the SImode value as low element of a V4SImode vector. */
39747 tmp2 = gen_reg_rtx (V4SImode);
39748 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39749 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39751 ok = (expand_vec_perm_1 (&dperm)
39752 || expand_vec_perm_broadcast_1 (&dperm));
39753 gcc_assert (ok);
39754 return ok;
39756 goto widen;
39758 case V16QImode:
39759 if (TARGET_SSE2)
39760 goto permute;
39761 goto widen;
39763 widen:
39764 /* Replicate the value once into the next wider mode and recurse. */
39766 enum machine_mode smode, wsmode, wvmode;
39767 rtx x;
39769 smode = GET_MODE_INNER (mode);
39770 wvmode = get_mode_wider_vector (mode);
39771 wsmode = GET_MODE_INNER (wvmode);
39773 val = convert_modes (wsmode, smode, val, true);
39774 x = expand_simple_binop (wsmode, ASHIFT, val,
39775 GEN_INT (GET_MODE_BITSIZE (smode)),
39776 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39777 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39779 x = gen_reg_rtx (wvmode);
39780 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39781 gcc_assert (ok);
39782 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39783 return ok;
39786 case V16HImode:
39787 case V32QImode:
39789 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39790 rtx x = gen_reg_rtx (hvmode);
39792 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39793 gcc_assert (ok);
39795 x = gen_rtx_VEC_CONCAT (mode, x, x);
39796 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39798 return true;
39800 default:
39801 return false;
39805 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39806 whose ONE_VAR element is VAR, and other elements are zero. Return true
39807 if successful. */
39809 static bool
39810 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39811 rtx target, rtx var, int one_var)
39813 enum machine_mode vsimode;
39814 rtx new_target;
39815 rtx x, tmp;
39816 bool use_vector_set = false;
39818 switch (mode)
39820 case V2DImode:
39821 /* For SSE4.1, we normally use vector set. But if the second
39822 element is zero and inter-unit moves are OK, we use movq
39823 instead. */
39824 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39825 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39826 && one_var == 0));
39827 break;
39828 case V16QImode:
39829 case V4SImode:
39830 case V4SFmode:
39831 use_vector_set = TARGET_SSE4_1;
39832 break;
39833 case V8HImode:
39834 use_vector_set = TARGET_SSE2;
39835 break;
39836 case V4HImode:
39837 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39838 break;
39839 case V32QImode:
39840 case V16HImode:
39841 case V8SImode:
39842 case V8SFmode:
39843 case V4DFmode:
39844 use_vector_set = TARGET_AVX;
39845 break;
39846 case V4DImode:
39847 /* Use ix86_expand_vector_set in 64bit mode only. */
39848 use_vector_set = TARGET_AVX && TARGET_64BIT;
39849 break;
39850 default:
39851 break;
39854 if (use_vector_set)
39856 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39857 var = force_reg (GET_MODE_INNER (mode), var);
39858 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39859 return true;
39862 switch (mode)
39864 case V2SFmode:
39865 case V2SImode:
39866 if (!mmx_ok)
39867 return false;
39868 /* FALLTHRU */
39870 case V2DFmode:
39871 case V2DImode:
39872 if (one_var != 0)
39873 return false;
39874 var = force_reg (GET_MODE_INNER (mode), var);
39875 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39876 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39877 return true;
39879 case V4SFmode:
39880 case V4SImode:
39881 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39882 new_target = gen_reg_rtx (mode);
39883 else
39884 new_target = target;
39885 var = force_reg (GET_MODE_INNER (mode), var);
39886 x = gen_rtx_VEC_DUPLICATE (mode, var);
39887 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39888 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39889 if (one_var != 0)
39891 /* We need to shuffle the value to the correct position, so
39892 create a new pseudo to store the intermediate result. */
39894 /* With SSE2, we can use the integer shuffle insns. */
39895 if (mode != V4SFmode && TARGET_SSE2)
39897 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39898 const1_rtx,
39899 GEN_INT (one_var == 1 ? 0 : 1),
39900 GEN_INT (one_var == 2 ? 0 : 1),
39901 GEN_INT (one_var == 3 ? 0 : 1)));
39902 if (target != new_target)
39903 emit_move_insn (target, new_target);
39904 return true;
39907 /* Otherwise convert the intermediate result to V4SFmode and
39908 use the SSE1 shuffle instructions. */
39909 if (mode != V4SFmode)
39911 tmp = gen_reg_rtx (V4SFmode);
39912 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39914 else
39915 tmp = new_target;
39917 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39918 const1_rtx,
39919 GEN_INT (one_var == 1 ? 0 : 1),
39920 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39921 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39923 if (mode != V4SFmode)
39924 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39925 else if (tmp != target)
39926 emit_move_insn (target, tmp);
39928 else if (target != new_target)
39929 emit_move_insn (target, new_target);
39930 return true;
39932 case V8HImode:
39933 case V16QImode:
39934 vsimode = V4SImode;
39935 goto widen;
39936 case V4HImode:
39937 case V8QImode:
39938 if (!mmx_ok)
39939 return false;
39940 vsimode = V2SImode;
39941 goto widen;
39942 widen:
39943 if (one_var != 0)
39944 return false;
39946 /* Zero extend the variable element to SImode and recurse. */
39947 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39949 x = gen_reg_rtx (vsimode);
39950 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39951 var, one_var))
39952 gcc_unreachable ();
39954 emit_move_insn (target, gen_lowpart (mode, x));
39955 return true;
39957 default:
39958 return false;
39962 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39963 consisting of the values in VALS. It is known that all elements
39964 except ONE_VAR are constants. Return true if successful. */
39966 static bool
39967 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39968 rtx target, rtx vals, int one_var)
39970 rtx var = XVECEXP (vals, 0, one_var);
39971 enum machine_mode wmode;
39972 rtx const_vec, x;
39974 const_vec = copy_rtx (vals);
39975 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39976 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39978 switch (mode)
39980 case V2DFmode:
39981 case V2DImode:
39982 case V2SFmode:
39983 case V2SImode:
39984 /* For the two element vectors, it's just as easy to use
39985 the general case. */
39986 return false;
39988 case V4DImode:
39989 /* Use ix86_expand_vector_set in 64bit mode only. */
39990 if (!TARGET_64BIT)
39991 return false;
39992 case V4DFmode:
39993 case V8SFmode:
39994 case V8SImode:
39995 case V16HImode:
39996 case V32QImode:
39997 case V4SFmode:
39998 case V4SImode:
39999 case V8HImode:
40000 case V4HImode:
40001 break;
40003 case V16QImode:
40004 if (TARGET_SSE4_1)
40005 break;
40006 wmode = V8HImode;
40007 goto widen;
40008 case V8QImode:
40009 wmode = V4HImode;
40010 goto widen;
40011 widen:
40012 /* There's no way to set one QImode entry easily. Combine
40013 the variable value with its adjacent constant value, and
40014 promote to an HImode set. */
40015 x = XVECEXP (vals, 0, one_var ^ 1);
40016 if (one_var & 1)
40018 var = convert_modes (HImode, QImode, var, true);
40019 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
40020 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40021 x = GEN_INT (INTVAL (x) & 0xff);
40023 else
40025 var = convert_modes (HImode, QImode, var, true);
40026 x = gen_int_mode (INTVAL (x) << 8, HImode);
40028 if (x != const0_rtx)
40029 var = expand_simple_binop (HImode, IOR, var, x, var,
40030 1, OPTAB_LIB_WIDEN);
40032 x = gen_reg_rtx (wmode);
40033 emit_move_insn (x, gen_lowpart (wmode, const_vec));
40034 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
40036 emit_move_insn (target, gen_lowpart (mode, x));
40037 return true;
40039 default:
40040 return false;
40043 emit_move_insn (target, const_vec);
40044 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40045 return true;
40048 /* A subroutine of ix86_expand_vector_init_general. Use vector
40049 concatenate to handle the most general case: all values variable,
40050 and none identical. */
40052 static void
40053 ix86_expand_vector_init_concat (enum machine_mode mode,
40054 rtx target, rtx *ops, int n)
40056 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
40057 rtx first[16], second[8], third[4];
40058 rtvec v;
40059 int i, j;
40061 switch (n)
40063 case 2:
40064 switch (mode)
40066 case V16SImode:
40067 cmode = V8SImode;
40068 break;
40069 case V16SFmode:
40070 cmode = V8SFmode;
40071 break;
40072 case V8DImode:
40073 cmode = V4DImode;
40074 break;
40075 case V8DFmode:
40076 cmode = V4DFmode;
40077 break;
40078 case V8SImode:
40079 cmode = V4SImode;
40080 break;
40081 case V8SFmode:
40082 cmode = V4SFmode;
40083 break;
40084 case V4DImode:
40085 cmode = V2DImode;
40086 break;
40087 case V4DFmode:
40088 cmode = V2DFmode;
40089 break;
40090 case V4SImode:
40091 cmode = V2SImode;
40092 break;
40093 case V4SFmode:
40094 cmode = V2SFmode;
40095 break;
40096 case V2DImode:
40097 cmode = DImode;
40098 break;
40099 case V2SImode:
40100 cmode = SImode;
40101 break;
40102 case V2DFmode:
40103 cmode = DFmode;
40104 break;
40105 case V2SFmode:
40106 cmode = SFmode;
40107 break;
40108 default:
40109 gcc_unreachable ();
40112 if (!register_operand (ops[1], cmode))
40113 ops[1] = force_reg (cmode, ops[1]);
40114 if (!register_operand (ops[0], cmode))
40115 ops[0] = force_reg (cmode, ops[0]);
40116 emit_insn (gen_rtx_SET (VOIDmode, target,
40117 gen_rtx_VEC_CONCAT (mode, ops[0],
40118 ops[1])));
40119 break;
40121 case 4:
40122 switch (mode)
40124 case V4DImode:
40125 cmode = V2DImode;
40126 break;
40127 case V4DFmode:
40128 cmode = V2DFmode;
40129 break;
40130 case V4SImode:
40131 cmode = V2SImode;
40132 break;
40133 case V4SFmode:
40134 cmode = V2SFmode;
40135 break;
40136 default:
40137 gcc_unreachable ();
40139 goto half;
40141 case 8:
40142 switch (mode)
40144 case V8DImode:
40145 cmode = V2DImode;
40146 hmode = V4DImode;
40147 break;
40148 case V8DFmode:
40149 cmode = V2DFmode;
40150 hmode = V4DFmode;
40151 break;
40152 case V8SImode:
40153 cmode = V2SImode;
40154 hmode = V4SImode;
40155 break;
40156 case V8SFmode:
40157 cmode = V2SFmode;
40158 hmode = V4SFmode;
40159 break;
40160 default:
40161 gcc_unreachable ();
40163 goto half;
40165 case 16:
40166 switch (mode)
40168 case V16SImode:
40169 cmode = V2SImode;
40170 hmode = V4SImode;
40171 gmode = V8SImode;
40172 break;
40173 case V16SFmode:
40174 cmode = V2SFmode;
40175 hmode = V4SFmode;
40176 gmode = V8SFmode;
40177 break;
40178 default:
40179 gcc_unreachable ();
40181 goto half;
40183 half:
40184 /* FIXME: We process inputs backward to help RA. PR 36222. */
40185 i = n - 1;
40186 j = (n >> 1) - 1;
40187 for (; i > 0; i -= 2, j--)
40189 first[j] = gen_reg_rtx (cmode);
40190 v = gen_rtvec (2, ops[i - 1], ops[i]);
40191 ix86_expand_vector_init (false, first[j],
40192 gen_rtx_PARALLEL (cmode, v));
40195 n >>= 1;
40196 if (n > 4)
40198 gcc_assert (hmode != VOIDmode);
40199 gcc_assert (gmode != VOIDmode);
40200 for (i = j = 0; i < n; i += 2, j++)
40202 second[j] = gen_reg_rtx (hmode);
40203 ix86_expand_vector_init_concat (hmode, second [j],
40204 &first [i], 2);
40206 n >>= 1;
40207 for (i = j = 0; i < n; i += 2, j++)
40209 third[j] = gen_reg_rtx (gmode);
40210 ix86_expand_vector_init_concat (gmode, third[j],
40211 &second[i], 2);
40213 n >>= 1;
40214 ix86_expand_vector_init_concat (mode, target, third, n);
40216 else if (n > 2)
40218 gcc_assert (hmode != VOIDmode);
40219 for (i = j = 0; i < n; i += 2, j++)
40221 second[j] = gen_reg_rtx (hmode);
40222 ix86_expand_vector_init_concat (hmode, second [j],
40223 &first [i], 2);
40225 n >>= 1;
40226 ix86_expand_vector_init_concat (mode, target, second, n);
40228 else
40229 ix86_expand_vector_init_concat (mode, target, first, n);
40230 break;
40232 default:
40233 gcc_unreachable ();
40237 /* A subroutine of ix86_expand_vector_init_general. Use vector
40238 interleave to handle the most general case: all values variable,
40239 and none identical. */
40241 static void
40242 ix86_expand_vector_init_interleave (enum machine_mode mode,
40243 rtx target, rtx *ops, int n)
40245 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40246 int i, j;
40247 rtx op0, op1;
40248 rtx (*gen_load_even) (rtx, rtx, rtx);
40249 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40250 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40252 switch (mode)
40254 case V8HImode:
40255 gen_load_even = gen_vec_setv8hi;
40256 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40257 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40258 inner_mode = HImode;
40259 first_imode = V4SImode;
40260 second_imode = V2DImode;
40261 third_imode = VOIDmode;
40262 break;
40263 case V16QImode:
40264 gen_load_even = gen_vec_setv16qi;
40265 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40266 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40267 inner_mode = QImode;
40268 first_imode = V8HImode;
40269 second_imode = V4SImode;
40270 third_imode = V2DImode;
40271 break;
40272 default:
40273 gcc_unreachable ();
40276 for (i = 0; i < n; i++)
40278 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40279 op0 = gen_reg_rtx (SImode);
40280 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40282 /* Insert the SImode value as low element of V4SImode vector. */
40283 op1 = gen_reg_rtx (V4SImode);
40284 op0 = gen_rtx_VEC_MERGE (V4SImode,
40285 gen_rtx_VEC_DUPLICATE (V4SImode,
40286 op0),
40287 CONST0_RTX (V4SImode),
40288 const1_rtx);
40289 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40291 /* Cast the V4SImode vector back to a vector in orignal mode. */
40292 op0 = gen_reg_rtx (mode);
40293 emit_move_insn (op0, gen_lowpart (mode, op1));
40295 /* Load even elements into the second position. */
40296 emit_insn (gen_load_even (op0,
40297 force_reg (inner_mode,
40298 ops [i + i + 1]),
40299 const1_rtx));
40301 /* Cast vector to FIRST_IMODE vector. */
40302 ops[i] = gen_reg_rtx (first_imode);
40303 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40306 /* Interleave low FIRST_IMODE vectors. */
40307 for (i = j = 0; i < n; i += 2, j++)
40309 op0 = gen_reg_rtx (first_imode);
40310 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40312 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40313 ops[j] = gen_reg_rtx (second_imode);
40314 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40317 /* Interleave low SECOND_IMODE vectors. */
40318 switch (second_imode)
40320 case V4SImode:
40321 for (i = j = 0; i < n / 2; i += 2, j++)
40323 op0 = gen_reg_rtx (second_imode);
40324 emit_insn (gen_interleave_second_low (op0, ops[i],
40325 ops[i + 1]));
40327 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40328 vector. */
40329 ops[j] = gen_reg_rtx (third_imode);
40330 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40332 second_imode = V2DImode;
40333 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40334 /* FALLTHRU */
40336 case V2DImode:
40337 op0 = gen_reg_rtx (second_imode);
40338 emit_insn (gen_interleave_second_low (op0, ops[0],
40339 ops[1]));
40341 /* Cast the SECOND_IMODE vector back to a vector on original
40342 mode. */
40343 emit_insn (gen_rtx_SET (VOIDmode, target,
40344 gen_lowpart (mode, op0)));
40345 break;
40347 default:
40348 gcc_unreachable ();
40352 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40353 all values variable, and none identical. */
40355 static void
40356 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40357 rtx target, rtx vals)
40359 rtx ops[64], op0, op1;
40360 enum machine_mode half_mode = VOIDmode;
40361 int n, i;
40363 switch (mode)
40365 case V2SFmode:
40366 case V2SImode:
40367 if (!mmx_ok && !TARGET_SSE)
40368 break;
40369 /* FALLTHRU */
40371 case V16SImode:
40372 case V16SFmode:
40373 case V8DFmode:
40374 case V8DImode:
40375 case V8SFmode:
40376 case V8SImode:
40377 case V4DFmode:
40378 case V4DImode:
40379 case V4SFmode:
40380 case V4SImode:
40381 case V2DFmode:
40382 case V2DImode:
40383 n = GET_MODE_NUNITS (mode);
40384 for (i = 0; i < n; i++)
40385 ops[i] = XVECEXP (vals, 0, i);
40386 ix86_expand_vector_init_concat (mode, target, ops, n);
40387 return;
40389 case V32QImode:
40390 half_mode = V16QImode;
40391 goto half;
40393 case V16HImode:
40394 half_mode = V8HImode;
40395 goto half;
40397 half:
40398 n = GET_MODE_NUNITS (mode);
40399 for (i = 0; i < n; i++)
40400 ops[i] = XVECEXP (vals, 0, i);
40401 op0 = gen_reg_rtx (half_mode);
40402 op1 = gen_reg_rtx (half_mode);
40403 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40404 n >> 2);
40405 ix86_expand_vector_init_interleave (half_mode, op1,
40406 &ops [n >> 1], n >> 2);
40407 emit_insn (gen_rtx_SET (VOIDmode, target,
40408 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40409 return;
40411 case V16QImode:
40412 if (!TARGET_SSE4_1)
40413 break;
40414 /* FALLTHRU */
40416 case V8HImode:
40417 if (!TARGET_SSE2)
40418 break;
40420 /* Don't use ix86_expand_vector_init_interleave if we can't
40421 move from GPR to SSE register directly. */
40422 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40423 break;
40425 n = GET_MODE_NUNITS (mode);
40426 for (i = 0; i < n; i++)
40427 ops[i] = XVECEXP (vals, 0, i);
40428 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40429 return;
40431 case V4HImode:
40432 case V8QImode:
40433 break;
40435 default:
40436 gcc_unreachable ();
40440 int i, j, n_elts, n_words, n_elt_per_word;
40441 enum machine_mode inner_mode;
40442 rtx words[4], shift;
40444 inner_mode = GET_MODE_INNER (mode);
40445 n_elts = GET_MODE_NUNITS (mode);
40446 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40447 n_elt_per_word = n_elts / n_words;
40448 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40450 for (i = 0; i < n_words; ++i)
40452 rtx word = NULL_RTX;
40454 for (j = 0; j < n_elt_per_word; ++j)
40456 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40457 elt = convert_modes (word_mode, inner_mode, elt, true);
40459 if (j == 0)
40460 word = elt;
40461 else
40463 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40464 word, 1, OPTAB_LIB_WIDEN);
40465 word = expand_simple_binop (word_mode, IOR, word, elt,
40466 word, 1, OPTAB_LIB_WIDEN);
40470 words[i] = word;
40473 if (n_words == 1)
40474 emit_move_insn (target, gen_lowpart (mode, words[0]));
40475 else if (n_words == 2)
40477 rtx tmp = gen_reg_rtx (mode);
40478 emit_clobber (tmp);
40479 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40480 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40481 emit_move_insn (target, tmp);
40483 else if (n_words == 4)
40485 rtx tmp = gen_reg_rtx (V4SImode);
40486 gcc_assert (word_mode == SImode);
40487 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40488 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40489 emit_move_insn (target, gen_lowpart (mode, tmp));
40491 else
40492 gcc_unreachable ();
40496 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40497 instructions unless MMX_OK is true. */
40499 void
40500 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40502 enum machine_mode mode = GET_MODE (target);
40503 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40504 int n_elts = GET_MODE_NUNITS (mode);
40505 int n_var = 0, one_var = -1;
40506 bool all_same = true, all_const_zero = true;
40507 int i;
40508 rtx x;
40510 for (i = 0; i < n_elts; ++i)
40512 x = XVECEXP (vals, 0, i);
40513 if (!(CONST_INT_P (x)
40514 || GET_CODE (x) == CONST_DOUBLE
40515 || GET_CODE (x) == CONST_FIXED))
40516 n_var++, one_var = i;
40517 else if (x != CONST0_RTX (inner_mode))
40518 all_const_zero = false;
40519 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40520 all_same = false;
40523 /* Constants are best loaded from the constant pool. */
40524 if (n_var == 0)
40526 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40527 return;
40530 /* If all values are identical, broadcast the value. */
40531 if (all_same
40532 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40533 XVECEXP (vals, 0, 0)))
40534 return;
40536 /* Values where only one field is non-constant are best loaded from
40537 the pool and overwritten via move later. */
40538 if (n_var == 1)
40540 if (all_const_zero
40541 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40542 XVECEXP (vals, 0, one_var),
40543 one_var))
40544 return;
40546 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40547 return;
40550 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40553 void
40554 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40556 enum machine_mode mode = GET_MODE (target);
40557 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40558 enum machine_mode half_mode;
40559 bool use_vec_merge = false;
40560 rtx tmp;
40561 static rtx (*gen_extract[6][2]) (rtx, rtx)
40563 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40564 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40565 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40566 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40567 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40568 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40570 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40572 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40573 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40574 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40575 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40576 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40577 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40579 int i, j, n;
40581 switch (mode)
40583 case V2SFmode:
40584 case V2SImode:
40585 if (mmx_ok)
40587 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40588 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40589 if (elt == 0)
40590 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40591 else
40592 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40593 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40594 return;
40596 break;
40598 case V2DImode:
40599 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40600 if (use_vec_merge)
40601 break;
40603 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40604 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40605 if (elt == 0)
40606 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40607 else
40608 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40609 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40610 return;
40612 case V2DFmode:
40614 rtx op0, op1;
40616 /* For the two element vectors, we implement a VEC_CONCAT with
40617 the extraction of the other element. */
40619 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40620 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40622 if (elt == 0)
40623 op0 = val, op1 = tmp;
40624 else
40625 op0 = tmp, op1 = val;
40627 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40628 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40630 return;
40632 case V4SFmode:
40633 use_vec_merge = TARGET_SSE4_1;
40634 if (use_vec_merge)
40635 break;
40637 switch (elt)
40639 case 0:
40640 use_vec_merge = true;
40641 break;
40643 case 1:
40644 /* tmp = target = A B C D */
40645 tmp = copy_to_reg (target);
40646 /* target = A A B B */
40647 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40648 /* target = X A B B */
40649 ix86_expand_vector_set (false, target, val, 0);
40650 /* target = A X C D */
40651 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40652 const1_rtx, const0_rtx,
40653 GEN_INT (2+4), GEN_INT (3+4)));
40654 return;
40656 case 2:
40657 /* tmp = target = A B C D */
40658 tmp = copy_to_reg (target);
40659 /* tmp = X B C D */
40660 ix86_expand_vector_set (false, tmp, val, 0);
40661 /* target = A B X D */
40662 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40663 const0_rtx, const1_rtx,
40664 GEN_INT (0+4), GEN_INT (3+4)));
40665 return;
40667 case 3:
40668 /* tmp = target = A B C D */
40669 tmp = copy_to_reg (target);
40670 /* tmp = X B C D */
40671 ix86_expand_vector_set (false, tmp, val, 0);
40672 /* target = A B X D */
40673 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40674 const0_rtx, const1_rtx,
40675 GEN_INT (2+4), GEN_INT (0+4)));
40676 return;
40678 default:
40679 gcc_unreachable ();
40681 break;
40683 case V4SImode:
40684 use_vec_merge = TARGET_SSE4_1;
40685 if (use_vec_merge)
40686 break;
40688 /* Element 0 handled by vec_merge below. */
40689 if (elt == 0)
40691 use_vec_merge = true;
40692 break;
40695 if (TARGET_SSE2)
40697 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40698 store into element 0, then shuffle them back. */
40700 rtx order[4];
40702 order[0] = GEN_INT (elt);
40703 order[1] = const1_rtx;
40704 order[2] = const2_rtx;
40705 order[3] = GEN_INT (3);
40706 order[elt] = const0_rtx;
40708 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40709 order[1], order[2], order[3]));
40711 ix86_expand_vector_set (false, target, val, 0);
40713 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40714 order[1], order[2], order[3]));
40716 else
40718 /* For SSE1, we have to reuse the V4SF code. */
40719 rtx t = gen_reg_rtx (V4SFmode);
40720 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40721 emit_move_insn (target, gen_lowpart (mode, t));
40723 return;
40725 case V8HImode:
40726 use_vec_merge = TARGET_SSE2;
40727 break;
40728 case V4HImode:
40729 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40730 break;
40732 case V16QImode:
40733 use_vec_merge = TARGET_SSE4_1;
40734 break;
40736 case V8QImode:
40737 break;
40739 case V32QImode:
40740 half_mode = V16QImode;
40741 j = 0;
40742 n = 16;
40743 goto half;
40745 case V16HImode:
40746 half_mode = V8HImode;
40747 j = 1;
40748 n = 8;
40749 goto half;
40751 case V8SImode:
40752 half_mode = V4SImode;
40753 j = 2;
40754 n = 4;
40755 goto half;
40757 case V4DImode:
40758 half_mode = V2DImode;
40759 j = 3;
40760 n = 2;
40761 goto half;
40763 case V8SFmode:
40764 half_mode = V4SFmode;
40765 j = 4;
40766 n = 4;
40767 goto half;
40769 case V4DFmode:
40770 half_mode = V2DFmode;
40771 j = 5;
40772 n = 2;
40773 goto half;
40775 half:
40776 /* Compute offset. */
40777 i = elt / n;
40778 elt %= n;
40780 gcc_assert (i <= 1);
40782 /* Extract the half. */
40783 tmp = gen_reg_rtx (half_mode);
40784 emit_insn (gen_extract[j][i] (tmp, target));
40786 /* Put val in tmp at elt. */
40787 ix86_expand_vector_set (false, tmp, val, elt);
40789 /* Put it back. */
40790 emit_insn (gen_insert[j][i] (target, target, tmp));
40791 return;
40793 default:
40794 break;
40797 if (use_vec_merge)
40799 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40800 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40801 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40803 else
40805 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40807 emit_move_insn (mem, target);
40809 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40810 emit_move_insn (tmp, val);
40812 emit_move_insn (target, mem);
40816 void
40817 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40819 enum machine_mode mode = GET_MODE (vec);
40820 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40821 bool use_vec_extr = false;
40822 rtx tmp;
40824 switch (mode)
40826 case V2SImode:
40827 case V2SFmode:
40828 if (!mmx_ok)
40829 break;
40830 /* FALLTHRU */
40832 case V2DFmode:
40833 case V2DImode:
40834 use_vec_extr = true;
40835 break;
40837 case V4SFmode:
40838 use_vec_extr = TARGET_SSE4_1;
40839 if (use_vec_extr)
40840 break;
40842 switch (elt)
40844 case 0:
40845 tmp = vec;
40846 break;
40848 case 1:
40849 case 3:
40850 tmp = gen_reg_rtx (mode);
40851 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40852 GEN_INT (elt), GEN_INT (elt),
40853 GEN_INT (elt+4), GEN_INT (elt+4)));
40854 break;
40856 case 2:
40857 tmp = gen_reg_rtx (mode);
40858 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40859 break;
40861 default:
40862 gcc_unreachable ();
40864 vec = tmp;
40865 use_vec_extr = true;
40866 elt = 0;
40867 break;
40869 case V4SImode:
40870 use_vec_extr = TARGET_SSE4_1;
40871 if (use_vec_extr)
40872 break;
40874 if (TARGET_SSE2)
40876 switch (elt)
40878 case 0:
40879 tmp = vec;
40880 break;
40882 case 1:
40883 case 3:
40884 tmp = gen_reg_rtx (mode);
40885 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40886 GEN_INT (elt), GEN_INT (elt),
40887 GEN_INT (elt), GEN_INT (elt)));
40888 break;
40890 case 2:
40891 tmp = gen_reg_rtx (mode);
40892 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40893 break;
40895 default:
40896 gcc_unreachable ();
40898 vec = tmp;
40899 use_vec_extr = true;
40900 elt = 0;
40902 else
40904 /* For SSE1, we have to reuse the V4SF code. */
40905 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40906 gen_lowpart (V4SFmode, vec), elt);
40907 return;
40909 break;
40911 case V8HImode:
40912 use_vec_extr = TARGET_SSE2;
40913 break;
40914 case V4HImode:
40915 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40916 break;
40918 case V16QImode:
40919 use_vec_extr = TARGET_SSE4_1;
40920 break;
40922 case V8SFmode:
40923 if (TARGET_AVX)
40925 tmp = gen_reg_rtx (V4SFmode);
40926 if (elt < 4)
40927 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40928 else
40929 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40930 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40931 return;
40933 break;
40935 case V4DFmode:
40936 if (TARGET_AVX)
40938 tmp = gen_reg_rtx (V2DFmode);
40939 if (elt < 2)
40940 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40941 else
40942 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40943 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40944 return;
40946 break;
40948 case V32QImode:
40949 if (TARGET_AVX)
40951 tmp = gen_reg_rtx (V16QImode);
40952 if (elt < 16)
40953 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40954 else
40955 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40956 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40957 return;
40959 break;
40961 case V16HImode:
40962 if (TARGET_AVX)
40964 tmp = gen_reg_rtx (V8HImode);
40965 if (elt < 8)
40966 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40967 else
40968 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40969 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40970 return;
40972 break;
40974 case V8SImode:
40975 if (TARGET_AVX)
40977 tmp = gen_reg_rtx (V4SImode);
40978 if (elt < 4)
40979 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40980 else
40981 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40982 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40983 return;
40985 break;
40987 case V4DImode:
40988 if (TARGET_AVX)
40990 tmp = gen_reg_rtx (V2DImode);
40991 if (elt < 2)
40992 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40993 else
40994 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40995 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40996 return;
40998 break;
41000 case V16SFmode:
41001 tmp = gen_reg_rtx (V8SFmode);
41002 if (elt < 8)
41003 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
41004 else
41005 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
41006 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41007 return;
41009 case V8DFmode:
41010 tmp = gen_reg_rtx (V4DFmode);
41011 if (elt < 4)
41012 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
41013 else
41014 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
41015 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41016 return;
41018 case V16SImode:
41019 tmp = gen_reg_rtx (V8SImode);
41020 if (elt < 8)
41021 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
41022 else
41023 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
41024 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41025 return;
41027 case V8DImode:
41028 tmp = gen_reg_rtx (V4DImode);
41029 if (elt < 4)
41030 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
41031 else
41032 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
41033 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41034 return;
41036 case V8QImode:
41037 /* ??? Could extract the appropriate HImode element and shift. */
41038 default:
41039 break;
41042 if (use_vec_extr)
41044 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
41045 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
41047 /* Let the rtl optimizers know about the zero extension performed. */
41048 if (inner_mode == QImode || inner_mode == HImode)
41050 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
41051 target = gen_lowpart (SImode, target);
41054 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41056 else
41058 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41060 emit_move_insn (mem, vec);
41062 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41063 emit_move_insn (target, tmp);
41067 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41068 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41069 The upper bits of DEST are undefined, though they shouldn't cause
41070 exceptions (some bits from src or all zeros are ok). */
41072 static void
41073 emit_reduc_half (rtx dest, rtx src, int i)
41075 rtx tem, d = dest;
41076 switch (GET_MODE (src))
41078 case V4SFmode:
41079 if (i == 128)
41080 tem = gen_sse_movhlps (dest, src, src);
41081 else
41082 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41083 GEN_INT (1 + 4), GEN_INT (1 + 4));
41084 break;
41085 case V2DFmode:
41086 tem = gen_vec_interleave_highv2df (dest, src, src);
41087 break;
41088 case V16QImode:
41089 case V8HImode:
41090 case V4SImode:
41091 case V2DImode:
41092 d = gen_reg_rtx (V1TImode);
41093 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41094 GEN_INT (i / 2));
41095 break;
41096 case V8SFmode:
41097 if (i == 256)
41098 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41099 else
41100 tem = gen_avx_shufps256 (dest, src, src,
41101 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41102 break;
41103 case V4DFmode:
41104 if (i == 256)
41105 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41106 else
41107 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41108 break;
41109 case V32QImode:
41110 case V16HImode:
41111 case V8SImode:
41112 case V4DImode:
41113 if (i == 256)
41115 if (GET_MODE (dest) != V4DImode)
41116 d = gen_reg_rtx (V4DImode);
41117 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41118 gen_lowpart (V4DImode, src),
41119 const1_rtx);
41121 else
41123 d = gen_reg_rtx (V2TImode);
41124 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41125 GEN_INT (i / 2));
41127 break;
41128 case V16SImode:
41129 case V16SFmode:
41130 case V8DImode:
41131 case V8DFmode:
41132 if (i > 128)
41133 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41134 gen_lowpart (V16SImode, src),
41135 gen_lowpart (V16SImode, src),
41136 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41137 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41138 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41139 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41140 GEN_INT (0xC), GEN_INT (0xD),
41141 GEN_INT (0xE), GEN_INT (0xF),
41142 GEN_INT (0x10), GEN_INT (0x11),
41143 GEN_INT (0x12), GEN_INT (0x13),
41144 GEN_INT (0x14), GEN_INT (0x15),
41145 GEN_INT (0x16), GEN_INT (0x17));
41146 else
41147 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41148 gen_lowpart (V16SImode, src),
41149 GEN_INT (i == 128 ? 0x2 : 0x1),
41150 GEN_INT (0x3),
41151 GEN_INT (0x3),
41152 GEN_INT (0x3),
41153 GEN_INT (i == 128 ? 0x6 : 0x5),
41154 GEN_INT (0x7),
41155 GEN_INT (0x7),
41156 GEN_INT (0x7),
41157 GEN_INT (i == 128 ? 0xA : 0x9),
41158 GEN_INT (0xB),
41159 GEN_INT (0xB),
41160 GEN_INT (0xB),
41161 GEN_INT (i == 128 ? 0xE : 0xD),
41162 GEN_INT (0xF),
41163 GEN_INT (0xF),
41164 GEN_INT (0xF));
41165 break;
41166 default:
41167 gcc_unreachable ();
41169 emit_insn (tem);
41170 if (d != dest)
41171 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41174 /* Expand a vector reduction. FN is the binary pattern to reduce;
41175 DEST is the destination; IN is the input vector. */
41177 void
41178 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41180 rtx half, dst, vec = in;
41181 enum machine_mode mode = GET_MODE (in);
41182 int i;
41184 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41185 if (TARGET_SSE4_1
41186 && mode == V8HImode
41187 && fn == gen_uminv8hi3)
41189 emit_insn (gen_sse4_1_phminposuw (dest, in));
41190 return;
41193 for (i = GET_MODE_BITSIZE (mode);
41194 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41195 i >>= 1)
41197 half = gen_reg_rtx (mode);
41198 emit_reduc_half (half, vec, i);
41199 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41200 dst = dest;
41201 else
41202 dst = gen_reg_rtx (mode);
41203 emit_insn (fn (dst, half, vec));
41204 vec = dst;
41208 /* Target hook for scalar_mode_supported_p. */
41209 static bool
41210 ix86_scalar_mode_supported_p (enum machine_mode mode)
41212 if (DECIMAL_FLOAT_MODE_P (mode))
41213 return default_decimal_float_supported_p ();
41214 else if (mode == TFmode)
41215 return true;
41216 else
41217 return default_scalar_mode_supported_p (mode);
41220 /* Implements target hook vector_mode_supported_p. */
41221 static bool
41222 ix86_vector_mode_supported_p (enum machine_mode mode)
41224 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41225 return true;
41226 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41227 return true;
41228 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41229 return true;
41230 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41231 return true;
41232 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41233 return true;
41234 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41235 return true;
41236 return false;
41239 /* Target hook for c_mode_for_suffix. */
41240 static enum machine_mode
41241 ix86_c_mode_for_suffix (char suffix)
41243 if (suffix == 'q')
41244 return TFmode;
41245 if (suffix == 'w')
41246 return XFmode;
41248 return VOIDmode;
41251 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41253 We do this in the new i386 backend to maintain source compatibility
41254 with the old cc0-based compiler. */
41256 static tree
41257 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41259 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41260 clobbers);
41261 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41262 clobbers);
41263 return clobbers;
41266 /* Implements target vector targetm.asm.encode_section_info. */
41268 static void ATTRIBUTE_UNUSED
41269 ix86_encode_section_info (tree decl, rtx rtl, int first)
41271 default_encode_section_info (decl, rtl, first);
41273 if (TREE_CODE (decl) == VAR_DECL
41274 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41275 && ix86_in_large_data_p (decl))
41276 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41279 /* Worker function for REVERSE_CONDITION. */
41281 enum rtx_code
41282 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41284 return (mode != CCFPmode && mode != CCFPUmode
41285 ? reverse_condition (code)
41286 : reverse_condition_maybe_unordered (code));
41289 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41290 to OPERANDS[0]. */
41292 const char *
41293 output_387_reg_move (rtx insn, rtx *operands)
41295 if (REG_P (operands[0]))
41297 if (REG_P (operands[1])
41298 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41300 if (REGNO (operands[0]) == FIRST_STACK_REG)
41301 return output_387_ffreep (operands, 0);
41302 return "fstp\t%y0";
41304 if (STACK_TOP_P (operands[0]))
41305 return "fld%Z1\t%y1";
41306 return "fst\t%y0";
41308 else if (MEM_P (operands[0]))
41310 gcc_assert (REG_P (operands[1]));
41311 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41312 return "fstp%Z0\t%y0";
41313 else
41315 /* There is no non-popping store to memory for XFmode.
41316 So if we need one, follow the store with a load. */
41317 if (GET_MODE (operands[0]) == XFmode)
41318 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41319 else
41320 return "fst%Z0\t%y0";
41323 else
41324 gcc_unreachable();
41327 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41328 FP status register is set. */
41330 void
41331 ix86_emit_fp_unordered_jump (rtx label)
41333 rtx reg = gen_reg_rtx (HImode);
41334 rtx temp;
41336 emit_insn (gen_x86_fnstsw_1 (reg));
41338 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41340 emit_insn (gen_x86_sahf_1 (reg));
41342 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41343 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41345 else
41347 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41349 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41350 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41353 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41354 gen_rtx_LABEL_REF (VOIDmode, label),
41355 pc_rtx);
41356 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41358 emit_jump_insn (temp);
41359 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41362 /* Output code to perform a log1p XFmode calculation. */
41364 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41366 rtx_code_label *label1 = gen_label_rtx ();
41367 rtx_code_label *label2 = gen_label_rtx ();
41369 rtx tmp = gen_reg_rtx (XFmode);
41370 rtx tmp2 = gen_reg_rtx (XFmode);
41371 rtx test;
41373 emit_insn (gen_absxf2 (tmp, op1));
41374 test = gen_rtx_GE (VOIDmode, tmp,
41375 CONST_DOUBLE_FROM_REAL_VALUE (
41376 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41377 XFmode));
41378 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41380 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41381 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41382 emit_jump (label2);
41384 emit_label (label1);
41385 emit_move_insn (tmp, CONST1_RTX (XFmode));
41386 emit_insn (gen_addxf3 (tmp, op1, tmp));
41387 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41388 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41390 emit_label (label2);
41393 /* Emit code for round calculation. */
41394 void ix86_emit_i387_round (rtx op0, rtx op1)
41396 enum machine_mode inmode = GET_MODE (op1);
41397 enum machine_mode outmode = GET_MODE (op0);
41398 rtx e1, e2, res, tmp, tmp1, half;
41399 rtx scratch = gen_reg_rtx (HImode);
41400 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41401 rtx_code_label *jump_label = gen_label_rtx ();
41402 rtx insn;
41403 rtx (*gen_abs) (rtx, rtx);
41404 rtx (*gen_neg) (rtx, rtx);
41406 switch (inmode)
41408 case SFmode:
41409 gen_abs = gen_abssf2;
41410 break;
41411 case DFmode:
41412 gen_abs = gen_absdf2;
41413 break;
41414 case XFmode:
41415 gen_abs = gen_absxf2;
41416 break;
41417 default:
41418 gcc_unreachable ();
41421 switch (outmode)
41423 case SFmode:
41424 gen_neg = gen_negsf2;
41425 break;
41426 case DFmode:
41427 gen_neg = gen_negdf2;
41428 break;
41429 case XFmode:
41430 gen_neg = gen_negxf2;
41431 break;
41432 case HImode:
41433 gen_neg = gen_neghi2;
41434 break;
41435 case SImode:
41436 gen_neg = gen_negsi2;
41437 break;
41438 case DImode:
41439 gen_neg = gen_negdi2;
41440 break;
41441 default:
41442 gcc_unreachable ();
41445 e1 = gen_reg_rtx (inmode);
41446 e2 = gen_reg_rtx (inmode);
41447 res = gen_reg_rtx (outmode);
41449 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41451 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41453 /* scratch = fxam(op1) */
41454 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41455 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41456 UNSPEC_FXAM)));
41457 /* e1 = fabs(op1) */
41458 emit_insn (gen_abs (e1, op1));
41460 /* e2 = e1 + 0.5 */
41461 half = force_reg (inmode, half);
41462 emit_insn (gen_rtx_SET (VOIDmode, e2,
41463 gen_rtx_PLUS (inmode, e1, half)));
41465 /* res = floor(e2) */
41466 if (inmode != XFmode)
41468 tmp1 = gen_reg_rtx (XFmode);
41470 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41471 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41473 else
41474 tmp1 = e2;
41476 switch (outmode)
41478 case SFmode:
41479 case DFmode:
41481 rtx tmp0 = gen_reg_rtx (XFmode);
41483 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41485 emit_insn (gen_rtx_SET (VOIDmode, res,
41486 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41487 UNSPEC_TRUNC_NOOP)));
41489 break;
41490 case XFmode:
41491 emit_insn (gen_frndintxf2_floor (res, tmp1));
41492 break;
41493 case HImode:
41494 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41495 break;
41496 case SImode:
41497 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41498 break;
41499 case DImode:
41500 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41501 break;
41502 default:
41503 gcc_unreachable ();
41506 /* flags = signbit(a) */
41507 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41509 /* if (flags) then res = -res */
41510 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41511 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41512 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41513 pc_rtx);
41514 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41515 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41516 JUMP_LABEL (insn) = jump_label;
41518 emit_insn (gen_neg (res, res));
41520 emit_label (jump_label);
41521 LABEL_NUSES (jump_label) = 1;
41523 emit_move_insn (op0, res);
41526 /* Output code to perform a Newton-Rhapson approximation of a single precision
41527 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41529 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41531 rtx x0, x1, e0, e1;
41533 x0 = gen_reg_rtx (mode);
41534 e0 = gen_reg_rtx (mode);
41535 e1 = gen_reg_rtx (mode);
41536 x1 = gen_reg_rtx (mode);
41538 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41540 b = force_reg (mode, b);
41542 /* x0 = rcp(b) estimate */
41543 if (mode == V16SFmode || mode == V8DFmode)
41544 emit_insn (gen_rtx_SET (VOIDmode, x0,
41545 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41546 UNSPEC_RCP14)));
41547 else
41548 emit_insn (gen_rtx_SET (VOIDmode, x0,
41549 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41550 UNSPEC_RCP)));
41552 /* e0 = x0 * b */
41553 emit_insn (gen_rtx_SET (VOIDmode, e0,
41554 gen_rtx_MULT (mode, x0, b)));
41556 /* e0 = x0 * e0 */
41557 emit_insn (gen_rtx_SET (VOIDmode, e0,
41558 gen_rtx_MULT (mode, x0, e0)));
41560 /* e1 = x0 + x0 */
41561 emit_insn (gen_rtx_SET (VOIDmode, e1,
41562 gen_rtx_PLUS (mode, x0, x0)));
41564 /* x1 = e1 - e0 */
41565 emit_insn (gen_rtx_SET (VOIDmode, x1,
41566 gen_rtx_MINUS (mode, e1, e0)));
41568 /* res = a * x1 */
41569 emit_insn (gen_rtx_SET (VOIDmode, res,
41570 gen_rtx_MULT (mode, a, x1)));
41573 /* Output code to perform a Newton-Rhapson approximation of a
41574 single precision floating point [reciprocal] square root. */
41576 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41577 bool recip)
41579 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41580 REAL_VALUE_TYPE r;
41581 int unspec;
41583 x0 = gen_reg_rtx (mode);
41584 e0 = gen_reg_rtx (mode);
41585 e1 = gen_reg_rtx (mode);
41586 e2 = gen_reg_rtx (mode);
41587 e3 = gen_reg_rtx (mode);
41589 real_from_integer (&r, VOIDmode, -3, SIGNED);
41590 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41592 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41593 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41594 unspec = UNSPEC_RSQRT;
41596 if (VECTOR_MODE_P (mode))
41598 mthree = ix86_build_const_vector (mode, true, mthree);
41599 mhalf = ix86_build_const_vector (mode, true, mhalf);
41600 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41601 if (GET_MODE_SIZE (mode) == 64)
41602 unspec = UNSPEC_RSQRT14;
41605 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41606 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41608 a = force_reg (mode, a);
41610 /* x0 = rsqrt(a) estimate */
41611 emit_insn (gen_rtx_SET (VOIDmode, x0,
41612 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41613 unspec)));
41615 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41616 if (!recip)
41618 rtx zero, mask;
41620 zero = gen_reg_rtx (mode);
41621 mask = gen_reg_rtx (mode);
41623 zero = force_reg (mode, CONST0_RTX(mode));
41625 /* Handle masked compare. */
41626 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41628 mask = gen_reg_rtx (HImode);
41629 /* Imm value 0x4 corresponds to not-equal comparison. */
41630 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41631 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41633 else
41635 emit_insn (gen_rtx_SET (VOIDmode, mask,
41636 gen_rtx_NE (mode, zero, a)));
41638 emit_insn (gen_rtx_SET (VOIDmode, x0,
41639 gen_rtx_AND (mode, x0, mask)));
41643 /* e0 = x0 * a */
41644 emit_insn (gen_rtx_SET (VOIDmode, e0,
41645 gen_rtx_MULT (mode, x0, a)));
41646 /* e1 = e0 * x0 */
41647 emit_insn (gen_rtx_SET (VOIDmode, e1,
41648 gen_rtx_MULT (mode, e0, x0)));
41650 /* e2 = e1 - 3. */
41651 mthree = force_reg (mode, mthree);
41652 emit_insn (gen_rtx_SET (VOIDmode, e2,
41653 gen_rtx_PLUS (mode, e1, mthree)));
41655 mhalf = force_reg (mode, mhalf);
41656 if (recip)
41657 /* e3 = -.5 * x0 */
41658 emit_insn (gen_rtx_SET (VOIDmode, e3,
41659 gen_rtx_MULT (mode, x0, mhalf)));
41660 else
41661 /* e3 = -.5 * e0 */
41662 emit_insn (gen_rtx_SET (VOIDmode, e3,
41663 gen_rtx_MULT (mode, e0, mhalf)));
41664 /* ret = e2 * e3 */
41665 emit_insn (gen_rtx_SET (VOIDmode, res,
41666 gen_rtx_MULT (mode, e2, e3)));
41669 #ifdef TARGET_SOLARIS
41670 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41672 static void
41673 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41674 tree decl)
41676 /* With Binutils 2.15, the "@unwind" marker must be specified on
41677 every occurrence of the ".eh_frame" section, not just the first
41678 one. */
41679 if (TARGET_64BIT
41680 && strcmp (name, ".eh_frame") == 0)
41682 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41683 flags & SECTION_WRITE ? "aw" : "a");
41684 return;
41687 #ifndef USE_GAS
41688 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41690 solaris_elf_asm_comdat_section (name, flags, decl);
41691 return;
41693 #endif
41695 default_elf_asm_named_section (name, flags, decl);
41697 #endif /* TARGET_SOLARIS */
41699 /* Return the mangling of TYPE if it is an extended fundamental type. */
41701 static const char *
41702 ix86_mangle_type (const_tree type)
41704 type = TYPE_MAIN_VARIANT (type);
41706 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41707 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41708 return NULL;
41710 switch (TYPE_MODE (type))
41712 case TFmode:
41713 /* __float128 is "g". */
41714 return "g";
41715 case XFmode:
41716 /* "long double" or __float80 is "e". */
41717 return "e";
41718 default:
41719 return NULL;
41723 /* For 32-bit code we can save PIC register setup by using
41724 __stack_chk_fail_local hidden function instead of calling
41725 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41726 register, so it is better to call __stack_chk_fail directly. */
41728 static tree ATTRIBUTE_UNUSED
41729 ix86_stack_protect_fail (void)
41731 return TARGET_64BIT
41732 ? default_external_stack_protect_fail ()
41733 : default_hidden_stack_protect_fail ();
41736 /* Select a format to encode pointers in exception handling data. CODE
41737 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41738 true if the symbol may be affected by dynamic relocations.
41740 ??? All x86 object file formats are capable of representing this.
41741 After all, the relocation needed is the same as for the call insn.
41742 Whether or not a particular assembler allows us to enter such, I
41743 guess we'll have to see. */
41745 asm_preferred_eh_data_format (int code, int global)
41747 if (flag_pic)
41749 int type = DW_EH_PE_sdata8;
41750 if (!TARGET_64BIT
41751 || ix86_cmodel == CM_SMALL_PIC
41752 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41753 type = DW_EH_PE_sdata4;
41754 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41756 if (ix86_cmodel == CM_SMALL
41757 || (ix86_cmodel == CM_MEDIUM && code))
41758 return DW_EH_PE_udata4;
41759 return DW_EH_PE_absptr;
41762 /* Expand copysign from SIGN to the positive value ABS_VALUE
41763 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41764 the sign-bit. */
41765 static void
41766 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41768 enum machine_mode mode = GET_MODE (sign);
41769 rtx sgn = gen_reg_rtx (mode);
41770 if (mask == NULL_RTX)
41772 enum machine_mode vmode;
41774 if (mode == SFmode)
41775 vmode = V4SFmode;
41776 else if (mode == DFmode)
41777 vmode = V2DFmode;
41778 else
41779 vmode = mode;
41781 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41782 if (!VECTOR_MODE_P (mode))
41784 /* We need to generate a scalar mode mask in this case. */
41785 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41786 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41787 mask = gen_reg_rtx (mode);
41788 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41791 else
41792 mask = gen_rtx_NOT (mode, mask);
41793 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41794 gen_rtx_AND (mode, mask, sign)));
41795 emit_insn (gen_rtx_SET (VOIDmode, result,
41796 gen_rtx_IOR (mode, abs_value, sgn)));
41799 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41800 mask for masking out the sign-bit is stored in *SMASK, if that is
41801 non-null. */
41802 static rtx
41803 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41805 enum machine_mode vmode, mode = GET_MODE (op0);
41806 rtx xa, mask;
41808 xa = gen_reg_rtx (mode);
41809 if (mode == SFmode)
41810 vmode = V4SFmode;
41811 else if (mode == DFmode)
41812 vmode = V2DFmode;
41813 else
41814 vmode = mode;
41815 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41816 if (!VECTOR_MODE_P (mode))
41818 /* We need to generate a scalar mode mask in this case. */
41819 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41820 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41821 mask = gen_reg_rtx (mode);
41822 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41824 emit_insn (gen_rtx_SET (VOIDmode, xa,
41825 gen_rtx_AND (mode, op0, mask)));
41827 if (smask)
41828 *smask = mask;
41830 return xa;
41833 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41834 swapping the operands if SWAP_OPERANDS is true. The expanded
41835 code is a forward jump to a newly created label in case the
41836 comparison is true. The generated label rtx is returned. */
41837 static rtx_code_label *
41838 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41839 bool swap_operands)
41841 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41842 rtx_code_label *label;
41843 rtx tmp;
41845 if (swap_operands)
41847 tmp = op0;
41848 op0 = op1;
41849 op1 = tmp;
41852 label = gen_label_rtx ();
41853 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41854 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41855 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41856 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41857 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41858 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41859 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41860 JUMP_LABEL (tmp) = label;
41862 return label;
41865 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41866 using comparison code CODE. Operands are swapped for the comparison if
41867 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41868 static rtx
41869 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41870 bool swap_operands)
41872 rtx (*insn)(rtx, rtx, rtx, rtx);
41873 enum machine_mode mode = GET_MODE (op0);
41874 rtx mask = gen_reg_rtx (mode);
41876 if (swap_operands)
41878 rtx tmp = op0;
41879 op0 = op1;
41880 op1 = tmp;
41883 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41885 emit_insn (insn (mask, op0, op1,
41886 gen_rtx_fmt_ee (code, mode, op0, op1)));
41887 return mask;
41890 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41891 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41892 static rtx
41893 ix86_gen_TWO52 (enum machine_mode mode)
41895 REAL_VALUE_TYPE TWO52r;
41896 rtx TWO52;
41898 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41899 TWO52 = const_double_from_real_value (TWO52r, mode);
41900 TWO52 = force_reg (mode, TWO52);
41902 return TWO52;
41905 /* Expand SSE sequence for computing lround from OP1 storing
41906 into OP0. */
41907 void
41908 ix86_expand_lround (rtx op0, rtx op1)
41910 /* C code for the stuff we're doing below:
41911 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41912 return (long)tmp;
41914 enum machine_mode mode = GET_MODE (op1);
41915 const struct real_format *fmt;
41916 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41917 rtx adj;
41919 /* load nextafter (0.5, 0.0) */
41920 fmt = REAL_MODE_FORMAT (mode);
41921 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41922 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41924 /* adj = copysign (0.5, op1) */
41925 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41926 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41928 /* adj = op1 + adj */
41929 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41931 /* op0 = (imode)adj */
41932 expand_fix (op0, adj, 0);
41935 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41936 into OPERAND0. */
41937 void
41938 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41940 /* C code for the stuff we're doing below (for do_floor):
41941 xi = (long)op1;
41942 xi -= (double)xi > op1 ? 1 : 0;
41943 return xi;
41945 enum machine_mode fmode = GET_MODE (op1);
41946 enum machine_mode imode = GET_MODE (op0);
41947 rtx ireg, freg, tmp;
41948 rtx_code_label *label;
41950 /* reg = (long)op1 */
41951 ireg = gen_reg_rtx (imode);
41952 expand_fix (ireg, op1, 0);
41954 /* freg = (double)reg */
41955 freg = gen_reg_rtx (fmode);
41956 expand_float (freg, ireg, 0);
41958 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41959 label = ix86_expand_sse_compare_and_jump (UNLE,
41960 freg, op1, !do_floor);
41961 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41962 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41963 emit_move_insn (ireg, tmp);
41965 emit_label (label);
41966 LABEL_NUSES (label) = 1;
41968 emit_move_insn (op0, ireg);
41971 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41972 result in OPERAND0. */
41973 void
41974 ix86_expand_rint (rtx operand0, rtx operand1)
41976 /* C code for the stuff we're doing below:
41977 xa = fabs (operand1);
41978 if (!isless (xa, 2**52))
41979 return operand1;
41980 xa = xa + 2**52 - 2**52;
41981 return copysign (xa, operand1);
41983 enum machine_mode mode = GET_MODE (operand0);
41984 rtx res, xa, TWO52, mask;
41985 rtx_code_label *label;
41987 res = gen_reg_rtx (mode);
41988 emit_move_insn (res, operand1);
41990 /* xa = abs (operand1) */
41991 xa = ix86_expand_sse_fabs (res, &mask);
41993 /* if (!isless (xa, TWO52)) goto label; */
41994 TWO52 = ix86_gen_TWO52 (mode);
41995 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41997 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41998 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42000 ix86_sse_copysign_to_positive (res, xa, res, mask);
42002 emit_label (label);
42003 LABEL_NUSES (label) = 1;
42005 emit_move_insn (operand0, res);
42008 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42009 into OPERAND0. */
42010 void
42011 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
42013 /* C code for the stuff we expand below.
42014 double xa = fabs (x), x2;
42015 if (!isless (xa, TWO52))
42016 return x;
42017 xa = xa + TWO52 - TWO52;
42018 x2 = copysign (xa, x);
42019 Compensate. Floor:
42020 if (x2 > x)
42021 x2 -= 1;
42022 Compensate. Ceil:
42023 if (x2 < x)
42024 x2 -= -1;
42025 return x2;
42027 enum machine_mode mode = GET_MODE (operand0);
42028 rtx xa, TWO52, tmp, one, res, mask;
42029 rtx_code_label *label;
42031 TWO52 = ix86_gen_TWO52 (mode);
42033 /* Temporary for holding the result, initialized to the input
42034 operand to ease control flow. */
42035 res = gen_reg_rtx (mode);
42036 emit_move_insn (res, operand1);
42038 /* xa = abs (operand1) */
42039 xa = ix86_expand_sse_fabs (res, &mask);
42041 /* if (!isless (xa, TWO52)) goto label; */
42042 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42044 /* xa = xa + TWO52 - TWO52; */
42045 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42046 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42048 /* xa = copysign (xa, operand1) */
42049 ix86_sse_copysign_to_positive (xa, xa, res, mask);
42051 /* generate 1.0 or -1.0 */
42052 one = force_reg (mode,
42053 const_double_from_real_value (do_floor
42054 ? dconst1 : dconstm1, mode));
42056 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42057 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42058 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42059 gen_rtx_AND (mode, one, tmp)));
42060 /* We always need to subtract here to preserve signed zero. */
42061 tmp = expand_simple_binop (mode, MINUS,
42062 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42063 emit_move_insn (res, tmp);
42065 emit_label (label);
42066 LABEL_NUSES (label) = 1;
42068 emit_move_insn (operand0, res);
42071 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42072 into OPERAND0. */
42073 void
42074 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
42076 /* C code for the stuff we expand below.
42077 double xa = fabs (x), x2;
42078 if (!isless (xa, TWO52))
42079 return x;
42080 x2 = (double)(long)x;
42081 Compensate. Floor:
42082 if (x2 > x)
42083 x2 -= 1;
42084 Compensate. Ceil:
42085 if (x2 < x)
42086 x2 += 1;
42087 if (HONOR_SIGNED_ZEROS (mode))
42088 return copysign (x2, x);
42089 return x2;
42091 enum machine_mode mode = GET_MODE (operand0);
42092 rtx xa, xi, TWO52, tmp, one, res, mask;
42093 rtx_code_label *label;
42095 TWO52 = ix86_gen_TWO52 (mode);
42097 /* Temporary for holding the result, initialized to the input
42098 operand to ease control flow. */
42099 res = gen_reg_rtx (mode);
42100 emit_move_insn (res, operand1);
42102 /* xa = abs (operand1) */
42103 xa = ix86_expand_sse_fabs (res, &mask);
42105 /* if (!isless (xa, TWO52)) goto label; */
42106 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42108 /* xa = (double)(long)x */
42109 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42110 expand_fix (xi, res, 0);
42111 expand_float (xa, xi, 0);
42113 /* generate 1.0 */
42114 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42116 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42117 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42118 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42119 gen_rtx_AND (mode, one, tmp)));
42120 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42121 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42122 emit_move_insn (res, tmp);
42124 if (HONOR_SIGNED_ZEROS (mode))
42125 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42127 emit_label (label);
42128 LABEL_NUSES (label) = 1;
42130 emit_move_insn (operand0, res);
42133 /* Expand SSE sequence for computing round from OPERAND1 storing
42134 into OPERAND0. Sequence that works without relying on DImode truncation
42135 via cvttsd2siq that is only available on 64bit targets. */
42136 void
42137 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42139 /* C code for the stuff we expand below.
42140 double xa = fabs (x), xa2, x2;
42141 if (!isless (xa, TWO52))
42142 return x;
42143 Using the absolute value and copying back sign makes
42144 -0.0 -> -0.0 correct.
42145 xa2 = xa + TWO52 - TWO52;
42146 Compensate.
42147 dxa = xa2 - xa;
42148 if (dxa <= -0.5)
42149 xa2 += 1;
42150 else if (dxa > 0.5)
42151 xa2 -= 1;
42152 x2 = copysign (xa2, x);
42153 return x2;
42155 enum machine_mode mode = GET_MODE (operand0);
42156 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
42157 rtx_code_label *label;
42159 TWO52 = ix86_gen_TWO52 (mode);
42161 /* Temporary for holding the result, initialized to the input
42162 operand to ease control flow. */
42163 res = gen_reg_rtx (mode);
42164 emit_move_insn (res, operand1);
42166 /* xa = abs (operand1) */
42167 xa = ix86_expand_sse_fabs (res, &mask);
42169 /* if (!isless (xa, TWO52)) goto label; */
42170 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42172 /* xa2 = xa + TWO52 - TWO52; */
42173 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42174 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42176 /* dxa = xa2 - xa; */
42177 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42179 /* generate 0.5, 1.0 and -0.5 */
42180 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42181 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42182 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42183 0, OPTAB_DIRECT);
42185 /* Compensate. */
42186 tmp = gen_reg_rtx (mode);
42187 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42188 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42189 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42190 gen_rtx_AND (mode, one, tmp)));
42191 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42192 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42193 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42194 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42195 gen_rtx_AND (mode, one, tmp)));
42196 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42198 /* res = copysign (xa2, operand1) */
42199 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42201 emit_label (label);
42202 LABEL_NUSES (label) = 1;
42204 emit_move_insn (operand0, res);
42207 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42208 into OPERAND0. */
42209 void
42210 ix86_expand_trunc (rtx operand0, rtx operand1)
42212 /* C code for SSE variant we expand below.
42213 double xa = fabs (x), x2;
42214 if (!isless (xa, TWO52))
42215 return x;
42216 x2 = (double)(long)x;
42217 if (HONOR_SIGNED_ZEROS (mode))
42218 return copysign (x2, x);
42219 return x2;
42221 enum machine_mode mode = GET_MODE (operand0);
42222 rtx xa, xi, TWO52, res, mask;
42223 rtx_code_label *label;
42225 TWO52 = ix86_gen_TWO52 (mode);
42227 /* Temporary for holding the result, initialized to the input
42228 operand to ease control flow. */
42229 res = gen_reg_rtx (mode);
42230 emit_move_insn (res, operand1);
42232 /* xa = abs (operand1) */
42233 xa = ix86_expand_sse_fabs (res, &mask);
42235 /* if (!isless (xa, TWO52)) goto label; */
42236 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42238 /* x = (double)(long)x */
42239 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42240 expand_fix (xi, res, 0);
42241 expand_float (res, xi, 0);
42243 if (HONOR_SIGNED_ZEROS (mode))
42244 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42246 emit_label (label);
42247 LABEL_NUSES (label) = 1;
42249 emit_move_insn (operand0, res);
42252 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42253 into OPERAND0. */
42254 void
42255 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42257 enum machine_mode mode = GET_MODE (operand0);
42258 rtx xa, mask, TWO52, one, res, smask, tmp;
42259 rtx_code_label *label;
42261 /* C code for SSE variant we expand below.
42262 double xa = fabs (x), x2;
42263 if (!isless (xa, TWO52))
42264 return x;
42265 xa2 = xa + TWO52 - TWO52;
42266 Compensate:
42267 if (xa2 > xa)
42268 xa2 -= 1.0;
42269 x2 = copysign (xa2, x);
42270 return x2;
42273 TWO52 = ix86_gen_TWO52 (mode);
42275 /* Temporary for holding the result, initialized to the input
42276 operand to ease control flow. */
42277 res = gen_reg_rtx (mode);
42278 emit_move_insn (res, operand1);
42280 /* xa = abs (operand1) */
42281 xa = ix86_expand_sse_fabs (res, &smask);
42283 /* if (!isless (xa, TWO52)) goto label; */
42284 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42286 /* res = xa + TWO52 - TWO52; */
42287 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42288 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42289 emit_move_insn (res, tmp);
42291 /* generate 1.0 */
42292 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42294 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42295 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42296 emit_insn (gen_rtx_SET (VOIDmode, mask,
42297 gen_rtx_AND (mode, mask, one)));
42298 tmp = expand_simple_binop (mode, MINUS,
42299 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42300 emit_move_insn (res, tmp);
42302 /* res = copysign (res, operand1) */
42303 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42305 emit_label (label);
42306 LABEL_NUSES (label) = 1;
42308 emit_move_insn (operand0, res);
42311 /* Expand SSE sequence for computing round from OPERAND1 storing
42312 into OPERAND0. */
42313 void
42314 ix86_expand_round (rtx operand0, rtx operand1)
42316 /* C code for the stuff we're doing below:
42317 double xa = fabs (x);
42318 if (!isless (xa, TWO52))
42319 return x;
42320 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42321 return copysign (xa, x);
42323 enum machine_mode mode = GET_MODE (operand0);
42324 rtx res, TWO52, xa, xi, half, mask;
42325 rtx_code_label *label;
42326 const struct real_format *fmt;
42327 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42329 /* Temporary for holding the result, initialized to the input
42330 operand to ease control flow. */
42331 res = gen_reg_rtx (mode);
42332 emit_move_insn (res, operand1);
42334 TWO52 = ix86_gen_TWO52 (mode);
42335 xa = ix86_expand_sse_fabs (res, &mask);
42336 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42338 /* load nextafter (0.5, 0.0) */
42339 fmt = REAL_MODE_FORMAT (mode);
42340 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42341 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42343 /* xa = xa + 0.5 */
42344 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42345 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42347 /* xa = (double)(int64_t)xa */
42348 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42349 expand_fix (xi, xa, 0);
42350 expand_float (xa, xi, 0);
42352 /* res = copysign (xa, operand1) */
42353 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42355 emit_label (label);
42356 LABEL_NUSES (label) = 1;
42358 emit_move_insn (operand0, res);
42361 /* Expand SSE sequence for computing round
42362 from OP1 storing into OP0 using sse4 round insn. */
42363 void
42364 ix86_expand_round_sse4 (rtx op0, rtx op1)
42366 enum machine_mode mode = GET_MODE (op0);
42367 rtx e1, e2, res, half;
42368 const struct real_format *fmt;
42369 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42370 rtx (*gen_copysign) (rtx, rtx, rtx);
42371 rtx (*gen_round) (rtx, rtx, rtx);
42373 switch (mode)
42375 case SFmode:
42376 gen_copysign = gen_copysignsf3;
42377 gen_round = gen_sse4_1_roundsf2;
42378 break;
42379 case DFmode:
42380 gen_copysign = gen_copysigndf3;
42381 gen_round = gen_sse4_1_rounddf2;
42382 break;
42383 default:
42384 gcc_unreachable ();
42387 /* round (a) = trunc (a + copysign (0.5, a)) */
42389 /* load nextafter (0.5, 0.0) */
42390 fmt = REAL_MODE_FORMAT (mode);
42391 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42392 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42393 half = const_double_from_real_value (pred_half, mode);
42395 /* e1 = copysign (0.5, op1) */
42396 e1 = gen_reg_rtx (mode);
42397 emit_insn (gen_copysign (e1, half, op1));
42399 /* e2 = op1 + e1 */
42400 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42402 /* res = trunc (e2) */
42403 res = gen_reg_rtx (mode);
42404 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42406 emit_move_insn (op0, res);
42410 /* Table of valid machine attributes. */
42411 static const struct attribute_spec ix86_attribute_table[] =
42413 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42414 affects_type_identity } */
42415 /* Stdcall attribute says callee is responsible for popping arguments
42416 if they are not variable. */
42417 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42418 true },
42419 /* Fastcall attribute says callee is responsible for popping arguments
42420 if they are not variable. */
42421 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42422 true },
42423 /* Thiscall attribute says callee is responsible for popping arguments
42424 if they are not variable. */
42425 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42426 true },
42427 /* Cdecl attribute says the callee is a normal C declaration */
42428 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42429 true },
42430 /* Regparm attribute specifies how many integer arguments are to be
42431 passed in registers. */
42432 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42433 true },
42434 /* Sseregparm attribute says we are using x86_64 calling conventions
42435 for FP arguments. */
42436 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42437 true },
42438 /* The transactional memory builtins are implicitly regparm or fastcall
42439 depending on the ABI. Override the generic do-nothing attribute that
42440 these builtins were declared with. */
42441 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42442 true },
42443 /* force_align_arg_pointer says this function realigns the stack at entry. */
42444 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42445 false, true, true, ix86_handle_cconv_attribute, false },
42446 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42447 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42448 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42449 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42450 false },
42451 #endif
42452 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42453 false },
42454 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42455 false },
42456 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42457 SUBTARGET_ATTRIBUTE_TABLE,
42458 #endif
42459 /* ms_abi and sysv_abi calling convention function attributes. */
42460 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42461 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42462 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42463 false },
42464 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42465 ix86_handle_callee_pop_aggregate_return, true },
42466 /* End element. */
42467 { NULL, 0, 0, false, false, false, NULL, false }
42470 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42471 static int
42472 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42473 tree vectype, int)
42475 unsigned elements;
42477 switch (type_of_cost)
42479 case scalar_stmt:
42480 return ix86_cost->scalar_stmt_cost;
42482 case scalar_load:
42483 return ix86_cost->scalar_load_cost;
42485 case scalar_store:
42486 return ix86_cost->scalar_store_cost;
42488 case vector_stmt:
42489 return ix86_cost->vec_stmt_cost;
42491 case vector_load:
42492 return ix86_cost->vec_align_load_cost;
42494 case vector_store:
42495 return ix86_cost->vec_store_cost;
42497 case vec_to_scalar:
42498 return ix86_cost->vec_to_scalar_cost;
42500 case scalar_to_vec:
42501 return ix86_cost->scalar_to_vec_cost;
42503 case unaligned_load:
42504 case unaligned_store:
42505 return ix86_cost->vec_unalign_load_cost;
42507 case cond_branch_taken:
42508 return ix86_cost->cond_taken_branch_cost;
42510 case cond_branch_not_taken:
42511 return ix86_cost->cond_not_taken_branch_cost;
42513 case vec_perm:
42514 case vec_promote_demote:
42515 return ix86_cost->vec_stmt_cost;
42517 case vec_construct:
42518 elements = TYPE_VECTOR_SUBPARTS (vectype);
42519 return elements / 2 + 1;
42521 default:
42522 gcc_unreachable ();
42526 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42527 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42528 insn every time. */
42530 static GTY(()) rtx vselect_insn;
42532 /* Initialize vselect_insn. */
42534 static void
42535 init_vselect_insn (void)
42537 unsigned i;
42538 rtx x;
42540 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42541 for (i = 0; i < MAX_VECT_LEN; ++i)
42542 XVECEXP (x, 0, i) = const0_rtx;
42543 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42544 const0_rtx), x);
42545 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42546 start_sequence ();
42547 vselect_insn = emit_insn (x);
42548 end_sequence ();
42551 /* Construct (set target (vec_select op0 (parallel perm))) and
42552 return true if that's a valid instruction in the active ISA. */
42554 static bool
42555 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42556 unsigned nelt, bool testing_p)
42558 unsigned int i;
42559 rtx x, save_vconcat;
42560 int icode;
42562 if (vselect_insn == NULL_RTX)
42563 init_vselect_insn ();
42565 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42566 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42567 for (i = 0; i < nelt; ++i)
42568 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42569 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42570 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42571 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42572 SET_DEST (PATTERN (vselect_insn)) = target;
42573 icode = recog_memoized (vselect_insn);
42575 if (icode >= 0 && !testing_p)
42576 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42578 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42579 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42580 INSN_CODE (vselect_insn) = -1;
42582 return icode >= 0;
42585 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42587 static bool
42588 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42589 const unsigned char *perm, unsigned nelt,
42590 bool testing_p)
42592 enum machine_mode v2mode;
42593 rtx x;
42594 bool ok;
42596 if (vselect_insn == NULL_RTX)
42597 init_vselect_insn ();
42599 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42600 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42601 PUT_MODE (x, v2mode);
42602 XEXP (x, 0) = op0;
42603 XEXP (x, 1) = op1;
42604 ok = expand_vselect (target, x, perm, nelt, testing_p);
42605 XEXP (x, 0) = const0_rtx;
42606 XEXP (x, 1) = const0_rtx;
42607 return ok;
42610 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42611 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42613 static bool
42614 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42616 enum machine_mode vmode = d->vmode;
42617 unsigned i, mask, nelt = d->nelt;
42618 rtx target, op0, op1, x;
42619 rtx rperm[32], vperm;
42621 if (d->one_operand_p)
42622 return false;
42623 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42625 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42627 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42629 else
42630 return false;
42632 /* This is a blend, not a permute. Elements must stay in their
42633 respective lanes. */
42634 for (i = 0; i < nelt; ++i)
42636 unsigned e = d->perm[i];
42637 if (!(e == i || e == i + nelt))
42638 return false;
42641 if (d->testing_p)
42642 return true;
42644 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42645 decision should be extracted elsewhere, so that we only try that
42646 sequence once all budget==3 options have been tried. */
42647 target = d->target;
42648 op0 = d->op0;
42649 op1 = d->op1;
42650 mask = 0;
42652 switch (vmode)
42654 case V4DFmode:
42655 case V8SFmode:
42656 case V2DFmode:
42657 case V4SFmode:
42658 case V8HImode:
42659 case V8SImode:
42660 for (i = 0; i < nelt; ++i)
42661 mask |= (d->perm[i] >= nelt) << i;
42662 break;
42664 case V2DImode:
42665 for (i = 0; i < 2; ++i)
42666 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42667 vmode = V8HImode;
42668 goto do_subreg;
42670 case V4SImode:
42671 for (i = 0; i < 4; ++i)
42672 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42673 vmode = V8HImode;
42674 goto do_subreg;
42676 case V16QImode:
42677 /* See if bytes move in pairs so we can use pblendw with
42678 an immediate argument, rather than pblendvb with a vector
42679 argument. */
42680 for (i = 0; i < 16; i += 2)
42681 if (d->perm[i] + 1 != d->perm[i + 1])
42683 use_pblendvb:
42684 for (i = 0; i < nelt; ++i)
42685 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42687 finish_pblendvb:
42688 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42689 vperm = force_reg (vmode, vperm);
42691 if (GET_MODE_SIZE (vmode) == 16)
42692 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42693 else
42694 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42695 if (target != d->target)
42696 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42697 return true;
42700 for (i = 0; i < 8; ++i)
42701 mask |= (d->perm[i * 2] >= 16) << i;
42702 vmode = V8HImode;
42703 /* FALLTHRU */
42705 do_subreg:
42706 target = gen_reg_rtx (vmode);
42707 op0 = gen_lowpart (vmode, op0);
42708 op1 = gen_lowpart (vmode, op1);
42709 break;
42711 case V32QImode:
42712 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42713 for (i = 0; i < 32; i += 2)
42714 if (d->perm[i] + 1 != d->perm[i + 1])
42715 goto use_pblendvb;
42716 /* See if bytes move in quadruplets. If yes, vpblendd
42717 with immediate can be used. */
42718 for (i = 0; i < 32; i += 4)
42719 if (d->perm[i] + 2 != d->perm[i + 2])
42720 break;
42721 if (i < 32)
42723 /* See if bytes move the same in both lanes. If yes,
42724 vpblendw with immediate can be used. */
42725 for (i = 0; i < 16; i += 2)
42726 if (d->perm[i] + 16 != d->perm[i + 16])
42727 goto use_pblendvb;
42729 /* Use vpblendw. */
42730 for (i = 0; i < 16; ++i)
42731 mask |= (d->perm[i * 2] >= 32) << i;
42732 vmode = V16HImode;
42733 goto do_subreg;
42736 /* Use vpblendd. */
42737 for (i = 0; i < 8; ++i)
42738 mask |= (d->perm[i * 4] >= 32) << i;
42739 vmode = V8SImode;
42740 goto do_subreg;
42742 case V16HImode:
42743 /* See if words move in pairs. If yes, vpblendd can be used. */
42744 for (i = 0; i < 16; i += 2)
42745 if (d->perm[i] + 1 != d->perm[i + 1])
42746 break;
42747 if (i < 16)
42749 /* See if words move the same in both lanes. If not,
42750 vpblendvb must be used. */
42751 for (i = 0; i < 8; i++)
42752 if (d->perm[i] + 8 != d->perm[i + 8])
42754 /* Use vpblendvb. */
42755 for (i = 0; i < 32; ++i)
42756 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42758 vmode = V32QImode;
42759 nelt = 32;
42760 target = gen_reg_rtx (vmode);
42761 op0 = gen_lowpart (vmode, op0);
42762 op1 = gen_lowpart (vmode, op1);
42763 goto finish_pblendvb;
42766 /* Use vpblendw. */
42767 for (i = 0; i < 16; ++i)
42768 mask |= (d->perm[i] >= 16) << i;
42769 break;
42772 /* Use vpblendd. */
42773 for (i = 0; i < 8; ++i)
42774 mask |= (d->perm[i * 2] >= 16) << i;
42775 vmode = V8SImode;
42776 goto do_subreg;
42778 case V4DImode:
42779 /* Use vpblendd. */
42780 for (i = 0; i < 4; ++i)
42781 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42782 vmode = V8SImode;
42783 goto do_subreg;
42785 default:
42786 gcc_unreachable ();
42789 /* This matches five different patterns with the different modes. */
42790 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42791 x = gen_rtx_SET (VOIDmode, target, x);
42792 emit_insn (x);
42793 if (target != d->target)
42794 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42796 return true;
42799 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42800 in terms of the variable form of vpermilps.
42802 Note that we will have already failed the immediate input vpermilps,
42803 which requires that the high and low part shuffle be identical; the
42804 variable form doesn't require that. */
42806 static bool
42807 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42809 rtx rperm[8], vperm;
42810 unsigned i;
42812 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42813 return false;
42815 /* We can only permute within the 128-bit lane. */
42816 for (i = 0; i < 8; ++i)
42818 unsigned e = d->perm[i];
42819 if (i < 4 ? e >= 4 : e < 4)
42820 return false;
42823 if (d->testing_p)
42824 return true;
42826 for (i = 0; i < 8; ++i)
42828 unsigned e = d->perm[i];
42830 /* Within each 128-bit lane, the elements of op0 are numbered
42831 from 0 and the elements of op1 are numbered from 4. */
42832 if (e >= 8 + 4)
42833 e -= 8;
42834 else if (e >= 4)
42835 e -= 4;
42837 rperm[i] = GEN_INT (e);
42840 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42841 vperm = force_reg (V8SImode, vperm);
42842 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42844 return true;
42847 /* Return true if permutation D can be performed as VMODE permutation
42848 instead. */
42850 static bool
42851 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42853 unsigned int i, j, chunk;
42855 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42856 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42857 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42858 return false;
42860 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42861 return true;
42863 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42864 for (i = 0; i < d->nelt; i += chunk)
42865 if (d->perm[i] & (chunk - 1))
42866 return false;
42867 else
42868 for (j = 1; j < chunk; ++j)
42869 if (d->perm[i] + j != d->perm[i + j])
42870 return false;
42872 return true;
42875 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42876 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42878 static bool
42879 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42881 unsigned i, nelt, eltsz, mask;
42882 unsigned char perm[32];
42883 enum machine_mode vmode = V16QImode;
42884 rtx rperm[32], vperm, target, op0, op1;
42886 nelt = d->nelt;
42888 if (!d->one_operand_p)
42890 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42892 if (TARGET_AVX2
42893 && valid_perm_using_mode_p (V2TImode, d))
42895 if (d->testing_p)
42896 return true;
42898 /* Use vperm2i128 insn. The pattern uses
42899 V4DImode instead of V2TImode. */
42900 target = d->target;
42901 if (d->vmode != V4DImode)
42902 target = gen_reg_rtx (V4DImode);
42903 op0 = gen_lowpart (V4DImode, d->op0);
42904 op1 = gen_lowpart (V4DImode, d->op1);
42905 rperm[0]
42906 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42907 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42908 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42909 if (target != d->target)
42910 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42911 return true;
42913 return false;
42916 else
42918 if (GET_MODE_SIZE (d->vmode) == 16)
42920 if (!TARGET_SSSE3)
42921 return false;
42923 else if (GET_MODE_SIZE (d->vmode) == 32)
42925 if (!TARGET_AVX2)
42926 return false;
42928 /* V4DImode should be already handled through
42929 expand_vselect by vpermq instruction. */
42930 gcc_assert (d->vmode != V4DImode);
42932 vmode = V32QImode;
42933 if (d->vmode == V8SImode
42934 || d->vmode == V16HImode
42935 || d->vmode == V32QImode)
42937 /* First see if vpermq can be used for
42938 V8SImode/V16HImode/V32QImode. */
42939 if (valid_perm_using_mode_p (V4DImode, d))
42941 for (i = 0; i < 4; i++)
42942 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42943 if (d->testing_p)
42944 return true;
42945 target = gen_reg_rtx (V4DImode);
42946 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42947 perm, 4, false))
42949 emit_move_insn (d->target,
42950 gen_lowpart (d->vmode, target));
42951 return true;
42953 return false;
42956 /* Next see if vpermd can be used. */
42957 if (valid_perm_using_mode_p (V8SImode, d))
42958 vmode = V8SImode;
42960 /* Or if vpermps can be used. */
42961 else if (d->vmode == V8SFmode)
42962 vmode = V8SImode;
42964 if (vmode == V32QImode)
42966 /* vpshufb only works intra lanes, it is not
42967 possible to shuffle bytes in between the lanes. */
42968 for (i = 0; i < nelt; ++i)
42969 if ((d->perm[i] ^ i) & (nelt / 2))
42970 return false;
42973 else
42974 return false;
42977 if (d->testing_p)
42978 return true;
42980 if (vmode == V8SImode)
42981 for (i = 0; i < 8; ++i)
42982 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42983 else
42985 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42986 if (!d->one_operand_p)
42987 mask = 2 * nelt - 1;
42988 else if (vmode == V16QImode)
42989 mask = nelt - 1;
42990 else
42991 mask = nelt / 2 - 1;
42993 for (i = 0; i < nelt; ++i)
42995 unsigned j, e = d->perm[i] & mask;
42996 for (j = 0; j < eltsz; ++j)
42997 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
43001 vperm = gen_rtx_CONST_VECTOR (vmode,
43002 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
43003 vperm = force_reg (vmode, vperm);
43005 target = d->target;
43006 if (d->vmode != vmode)
43007 target = gen_reg_rtx (vmode);
43008 op0 = gen_lowpart (vmode, d->op0);
43009 if (d->one_operand_p)
43011 if (vmode == V16QImode)
43012 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
43013 else if (vmode == V32QImode)
43014 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
43015 else if (vmode == V8SFmode)
43016 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
43017 else
43018 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
43020 else
43022 op1 = gen_lowpart (vmode, d->op1);
43023 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
43025 if (target != d->target)
43026 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43028 return true;
43031 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43032 in a single instruction. */
43034 static bool
43035 expand_vec_perm_1 (struct expand_vec_perm_d *d)
43037 unsigned i, nelt = d->nelt;
43038 unsigned char perm2[MAX_VECT_LEN];
43040 /* Check plain VEC_SELECT first, because AVX has instructions that could
43041 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43042 input where SEL+CONCAT may not. */
43043 if (d->one_operand_p)
43045 int mask = nelt - 1;
43046 bool identity_perm = true;
43047 bool broadcast_perm = true;
43049 for (i = 0; i < nelt; i++)
43051 perm2[i] = d->perm[i] & mask;
43052 if (perm2[i] != i)
43053 identity_perm = false;
43054 if (perm2[i])
43055 broadcast_perm = false;
43058 if (identity_perm)
43060 if (!d->testing_p)
43061 emit_move_insn (d->target, d->op0);
43062 return true;
43064 else if (broadcast_perm && TARGET_AVX2)
43066 /* Use vpbroadcast{b,w,d}. */
43067 rtx (*gen) (rtx, rtx) = NULL;
43068 switch (d->vmode)
43070 case V32QImode:
43071 gen = gen_avx2_pbroadcastv32qi_1;
43072 break;
43073 case V16HImode:
43074 gen = gen_avx2_pbroadcastv16hi_1;
43075 break;
43076 case V8SImode:
43077 gen = gen_avx2_pbroadcastv8si_1;
43078 break;
43079 case V16QImode:
43080 gen = gen_avx2_pbroadcastv16qi;
43081 break;
43082 case V8HImode:
43083 gen = gen_avx2_pbroadcastv8hi;
43084 break;
43085 case V8SFmode:
43086 gen = gen_avx2_vec_dupv8sf_1;
43087 break;
43088 /* For other modes prefer other shuffles this function creates. */
43089 default: break;
43091 if (gen != NULL)
43093 if (!d->testing_p)
43094 emit_insn (gen (d->target, d->op0));
43095 return true;
43099 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43100 return true;
43102 /* There are plenty of patterns in sse.md that are written for
43103 SEL+CONCAT and are not replicated for a single op. Perhaps
43104 that should be changed, to avoid the nastiness here. */
43106 /* Recognize interleave style patterns, which means incrementing
43107 every other permutation operand. */
43108 for (i = 0; i < nelt; i += 2)
43110 perm2[i] = d->perm[i] & mask;
43111 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43113 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43114 d->testing_p))
43115 return true;
43117 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43118 if (nelt >= 4)
43120 for (i = 0; i < nelt; i += 4)
43122 perm2[i + 0] = d->perm[i + 0] & mask;
43123 perm2[i + 1] = d->perm[i + 1] & mask;
43124 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43125 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43128 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43129 d->testing_p))
43130 return true;
43134 /* Finally, try the fully general two operand permute. */
43135 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43136 d->testing_p))
43137 return true;
43139 /* Recognize interleave style patterns with reversed operands. */
43140 if (!d->one_operand_p)
43142 for (i = 0; i < nelt; ++i)
43144 unsigned e = d->perm[i];
43145 if (e >= nelt)
43146 e -= nelt;
43147 else
43148 e += nelt;
43149 perm2[i] = e;
43152 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43153 d->testing_p))
43154 return true;
43157 /* Try the SSE4.1 blend variable merge instructions. */
43158 if (expand_vec_perm_blend (d))
43159 return true;
43161 /* Try one of the AVX vpermil variable permutations. */
43162 if (expand_vec_perm_vpermil (d))
43163 return true;
43165 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43166 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43167 if (expand_vec_perm_pshufb (d))
43168 return true;
43170 /* Try the AVX512F vpermi2 instructions. */
43171 rtx vec[64];
43172 enum machine_mode mode = d->vmode;
43173 if (mode == V8DFmode)
43174 mode = V8DImode;
43175 else if (mode == V16SFmode)
43176 mode = V16SImode;
43177 for (i = 0; i < nelt; ++i)
43178 vec[i] = GEN_INT (d->perm[i]);
43179 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43180 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43181 return true;
43183 return false;
43186 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43187 in terms of a pair of pshuflw + pshufhw instructions. */
43189 static bool
43190 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43192 unsigned char perm2[MAX_VECT_LEN];
43193 unsigned i;
43194 bool ok;
43196 if (d->vmode != V8HImode || !d->one_operand_p)
43197 return false;
43199 /* The two permutations only operate in 64-bit lanes. */
43200 for (i = 0; i < 4; ++i)
43201 if (d->perm[i] >= 4)
43202 return false;
43203 for (i = 4; i < 8; ++i)
43204 if (d->perm[i] < 4)
43205 return false;
43207 if (d->testing_p)
43208 return true;
43210 /* Emit the pshuflw. */
43211 memcpy (perm2, d->perm, 4);
43212 for (i = 4; i < 8; ++i)
43213 perm2[i] = i;
43214 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43215 gcc_assert (ok);
43217 /* Emit the pshufhw. */
43218 memcpy (perm2 + 4, d->perm + 4, 4);
43219 for (i = 0; i < 4; ++i)
43220 perm2[i] = i;
43221 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43222 gcc_assert (ok);
43224 return true;
43227 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43228 the permutation using the SSSE3 palignr instruction. This succeeds
43229 when all of the elements in PERM fit within one vector and we merely
43230 need to shift them down so that a single vector permutation has a
43231 chance to succeed. */
43233 static bool
43234 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43236 unsigned i, nelt = d->nelt;
43237 unsigned min, max;
43238 bool in_order, ok;
43239 rtx shift, target;
43240 struct expand_vec_perm_d dcopy;
43242 /* Even with AVX, palignr only operates on 128-bit vectors. */
43243 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43244 return false;
43246 min = nelt, max = 0;
43247 for (i = 0; i < nelt; ++i)
43249 unsigned e = d->perm[i];
43250 if (e < min)
43251 min = e;
43252 if (e > max)
43253 max = e;
43255 if (min == 0 || max - min >= nelt)
43256 return false;
43258 /* Given that we have SSSE3, we know we'll be able to implement the
43259 single operand permutation after the palignr with pshufb. */
43260 if (d->testing_p)
43261 return true;
43263 dcopy = *d;
43264 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43265 target = gen_reg_rtx (TImode);
43266 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43267 gen_lowpart (TImode, d->op0), shift));
43269 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43270 dcopy.one_operand_p = true;
43272 in_order = true;
43273 for (i = 0; i < nelt; ++i)
43275 unsigned e = dcopy.perm[i] - min;
43276 if (e != i)
43277 in_order = false;
43278 dcopy.perm[i] = e;
43281 /* Test for the degenerate case where the alignment by itself
43282 produces the desired permutation. */
43283 if (in_order)
43285 emit_move_insn (d->target, dcopy.op0);
43286 return true;
43289 ok = expand_vec_perm_1 (&dcopy);
43290 gcc_assert (ok);
43292 return ok;
43295 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43296 the permutation using the SSE4_1 pblendv instruction. Potentially
43297 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
43299 static bool
43300 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43302 unsigned i, which, nelt = d->nelt;
43303 struct expand_vec_perm_d dcopy, dcopy1;
43304 enum machine_mode vmode = d->vmode;
43305 bool ok;
43307 /* Use the same checks as in expand_vec_perm_blend, but skipping
43308 AVX and AVX2 as they require more than 2 instructions. */
43309 if (d->one_operand_p)
43310 return false;
43311 if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43313 else
43314 return false;
43316 /* Figure out where permutation elements stay not in their
43317 respective lanes. */
43318 for (i = 0, which = 0; i < nelt; ++i)
43320 unsigned e = d->perm[i];
43321 if (e != i)
43322 which |= (e < nelt ? 1 : 2);
43324 /* We can pblend the part where elements stay not in their
43325 respective lanes only when these elements are all in one
43326 half of a permutation.
43327 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43328 lanes, but both 8 and 9 >= 8
43329 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43330 respective lanes and 8 >= 8, but 2 not. */
43331 if (which != 1 && which != 2)
43332 return false;
43333 if (d->testing_p)
43334 return true;
43336 /* First we apply one operand permutation to the part where
43337 elements stay not in their respective lanes. */
43338 dcopy = *d;
43339 if (which == 2)
43340 dcopy.op0 = dcopy.op1 = d->op1;
43341 else
43342 dcopy.op0 = dcopy.op1 = d->op0;
43343 dcopy.one_operand_p = true;
43345 for (i = 0; i < nelt; ++i)
43346 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43348 ok = expand_vec_perm_1 (&dcopy);
43349 gcc_assert (ok);
43351 /* Next we put permuted elements into their positions. */
43352 dcopy1 = *d;
43353 if (which == 2)
43354 dcopy1.op1 = dcopy.target;
43355 else
43356 dcopy1.op0 = dcopy.target;
43358 for (i = 0; i < nelt; ++i)
43359 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43361 ok = expand_vec_perm_blend (&dcopy1);
43362 gcc_assert (ok);
43364 return true;
43367 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43369 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43370 a two vector permutation into a single vector permutation by using
43371 an interleave operation to merge the vectors. */
43373 static bool
43374 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43376 struct expand_vec_perm_d dremap, dfinal;
43377 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43378 unsigned HOST_WIDE_INT contents;
43379 unsigned char remap[2 * MAX_VECT_LEN];
43380 rtx_insn *seq;
43381 bool ok, same_halves = false;
43383 if (GET_MODE_SIZE (d->vmode) == 16)
43385 if (d->one_operand_p)
43386 return false;
43388 else if (GET_MODE_SIZE (d->vmode) == 32)
43390 if (!TARGET_AVX)
43391 return false;
43392 /* For 32-byte modes allow even d->one_operand_p.
43393 The lack of cross-lane shuffling in some instructions
43394 might prevent a single insn shuffle. */
43395 dfinal = *d;
43396 dfinal.testing_p = true;
43397 /* If expand_vec_perm_interleave3 can expand this into
43398 a 3 insn sequence, give up and let it be expanded as
43399 3 insn sequence. While that is one insn longer,
43400 it doesn't need a memory operand and in the common
43401 case that both interleave low and high permutations
43402 with the same operands are adjacent needs 4 insns
43403 for both after CSE. */
43404 if (expand_vec_perm_interleave3 (&dfinal))
43405 return false;
43407 else
43408 return false;
43410 /* Examine from whence the elements come. */
43411 contents = 0;
43412 for (i = 0; i < nelt; ++i)
43413 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43415 memset (remap, 0xff, sizeof (remap));
43416 dremap = *d;
43418 if (GET_MODE_SIZE (d->vmode) == 16)
43420 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43422 /* Split the two input vectors into 4 halves. */
43423 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43424 h2 = h1 << nelt2;
43425 h3 = h2 << nelt2;
43426 h4 = h3 << nelt2;
43428 /* If the elements from the low halves use interleave low, and similarly
43429 for interleave high. If the elements are from mis-matched halves, we
43430 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43431 if ((contents & (h1 | h3)) == contents)
43433 /* punpckl* */
43434 for (i = 0; i < nelt2; ++i)
43436 remap[i] = i * 2;
43437 remap[i + nelt] = i * 2 + 1;
43438 dremap.perm[i * 2] = i;
43439 dremap.perm[i * 2 + 1] = i + nelt;
43441 if (!TARGET_SSE2 && d->vmode == V4SImode)
43442 dremap.vmode = V4SFmode;
43444 else if ((contents & (h2 | h4)) == contents)
43446 /* punpckh* */
43447 for (i = 0; i < nelt2; ++i)
43449 remap[i + nelt2] = i * 2;
43450 remap[i + nelt + nelt2] = i * 2 + 1;
43451 dremap.perm[i * 2] = i + nelt2;
43452 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43454 if (!TARGET_SSE2 && d->vmode == V4SImode)
43455 dremap.vmode = V4SFmode;
43457 else if ((contents & (h1 | h4)) == contents)
43459 /* shufps */
43460 for (i = 0; i < nelt2; ++i)
43462 remap[i] = i;
43463 remap[i + nelt + nelt2] = i + nelt2;
43464 dremap.perm[i] = i;
43465 dremap.perm[i + nelt2] = i + nelt + nelt2;
43467 if (nelt != 4)
43469 /* shufpd */
43470 dremap.vmode = V2DImode;
43471 dremap.nelt = 2;
43472 dremap.perm[0] = 0;
43473 dremap.perm[1] = 3;
43476 else if ((contents & (h2 | h3)) == contents)
43478 /* shufps */
43479 for (i = 0; i < nelt2; ++i)
43481 remap[i + nelt2] = i;
43482 remap[i + nelt] = i + nelt2;
43483 dremap.perm[i] = i + nelt2;
43484 dremap.perm[i + nelt2] = i + nelt;
43486 if (nelt != 4)
43488 /* shufpd */
43489 dremap.vmode = V2DImode;
43490 dremap.nelt = 2;
43491 dremap.perm[0] = 1;
43492 dremap.perm[1] = 2;
43495 else
43496 return false;
43498 else
43500 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43501 unsigned HOST_WIDE_INT q[8];
43502 unsigned int nonzero_halves[4];
43504 /* Split the two input vectors into 8 quarters. */
43505 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43506 for (i = 1; i < 8; ++i)
43507 q[i] = q[0] << (nelt4 * i);
43508 for (i = 0; i < 4; ++i)
43509 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43511 nonzero_halves[nzcnt] = i;
43512 ++nzcnt;
43515 if (nzcnt == 1)
43517 gcc_assert (d->one_operand_p);
43518 nonzero_halves[1] = nonzero_halves[0];
43519 same_halves = true;
43521 else if (d->one_operand_p)
43523 gcc_assert (nonzero_halves[0] == 0);
43524 gcc_assert (nonzero_halves[1] == 1);
43527 if (nzcnt <= 2)
43529 if (d->perm[0] / nelt2 == nonzero_halves[1])
43531 /* Attempt to increase the likelihood that dfinal
43532 shuffle will be intra-lane. */
43533 char tmph = nonzero_halves[0];
43534 nonzero_halves[0] = nonzero_halves[1];
43535 nonzero_halves[1] = tmph;
43538 /* vperm2f128 or vperm2i128. */
43539 for (i = 0; i < nelt2; ++i)
43541 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43542 remap[i + nonzero_halves[0] * nelt2] = i;
43543 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43544 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43547 if (d->vmode != V8SFmode
43548 && d->vmode != V4DFmode
43549 && d->vmode != V8SImode)
43551 dremap.vmode = V8SImode;
43552 dremap.nelt = 8;
43553 for (i = 0; i < 4; ++i)
43555 dremap.perm[i] = i + nonzero_halves[0] * 4;
43556 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43560 else if (d->one_operand_p)
43561 return false;
43562 else if (TARGET_AVX2
43563 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43565 /* vpunpckl* */
43566 for (i = 0; i < nelt4; ++i)
43568 remap[i] = i * 2;
43569 remap[i + nelt] = i * 2 + 1;
43570 remap[i + nelt2] = i * 2 + nelt2;
43571 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43572 dremap.perm[i * 2] = i;
43573 dremap.perm[i * 2 + 1] = i + nelt;
43574 dremap.perm[i * 2 + nelt2] = i + nelt2;
43575 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43578 else if (TARGET_AVX2
43579 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43581 /* vpunpckh* */
43582 for (i = 0; i < nelt4; ++i)
43584 remap[i + nelt4] = i * 2;
43585 remap[i + nelt + nelt4] = i * 2 + 1;
43586 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43587 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43588 dremap.perm[i * 2] = i + nelt4;
43589 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43590 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43591 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43594 else
43595 return false;
43598 /* Use the remapping array set up above to move the elements from their
43599 swizzled locations into their final destinations. */
43600 dfinal = *d;
43601 for (i = 0; i < nelt; ++i)
43603 unsigned e = remap[d->perm[i]];
43604 gcc_assert (e < nelt);
43605 /* If same_halves is true, both halves of the remapped vector are the
43606 same. Avoid cross-lane accesses if possible. */
43607 if (same_halves && i >= nelt2)
43609 gcc_assert (e < nelt2);
43610 dfinal.perm[i] = e + nelt2;
43612 else
43613 dfinal.perm[i] = e;
43615 if (!d->testing_p)
43617 dremap.target = gen_reg_rtx (dremap.vmode);
43618 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43620 dfinal.op1 = dfinal.op0;
43621 dfinal.one_operand_p = true;
43623 /* Test if the final remap can be done with a single insn. For V4SFmode or
43624 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43625 start_sequence ();
43626 ok = expand_vec_perm_1 (&dfinal);
43627 seq = get_insns ();
43628 end_sequence ();
43630 if (!ok)
43631 return false;
43633 if (d->testing_p)
43634 return true;
43636 if (dremap.vmode != dfinal.vmode)
43638 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43639 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43642 ok = expand_vec_perm_1 (&dremap);
43643 gcc_assert (ok);
43645 emit_insn (seq);
43646 return true;
43649 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43650 a single vector cross-lane permutation into vpermq followed
43651 by any of the single insn permutations. */
43653 static bool
43654 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43656 struct expand_vec_perm_d dremap, dfinal;
43657 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43658 unsigned contents[2];
43659 bool ok;
43661 if (!(TARGET_AVX2
43662 && (d->vmode == V32QImode || d->vmode == V16HImode)
43663 && d->one_operand_p))
43664 return false;
43666 contents[0] = 0;
43667 contents[1] = 0;
43668 for (i = 0; i < nelt2; ++i)
43670 contents[0] |= 1u << (d->perm[i] / nelt4);
43671 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43674 for (i = 0; i < 2; ++i)
43676 unsigned int cnt = 0;
43677 for (j = 0; j < 4; ++j)
43678 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43679 return false;
43682 if (d->testing_p)
43683 return true;
43685 dremap = *d;
43686 dremap.vmode = V4DImode;
43687 dremap.nelt = 4;
43688 dremap.target = gen_reg_rtx (V4DImode);
43689 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43690 dremap.op1 = dremap.op0;
43691 dremap.one_operand_p = true;
43692 for (i = 0; i < 2; ++i)
43694 unsigned int cnt = 0;
43695 for (j = 0; j < 4; ++j)
43696 if ((contents[i] & (1u << j)) != 0)
43697 dremap.perm[2 * i + cnt++] = j;
43698 for (; cnt < 2; ++cnt)
43699 dremap.perm[2 * i + cnt] = 0;
43702 dfinal = *d;
43703 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43704 dfinal.op1 = dfinal.op0;
43705 dfinal.one_operand_p = true;
43706 for (i = 0, j = 0; i < nelt; ++i)
43708 if (i == nelt2)
43709 j = 2;
43710 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43711 if ((d->perm[i] / nelt4) == dremap.perm[j])
43713 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43714 dfinal.perm[i] |= nelt4;
43715 else
43716 gcc_unreachable ();
43719 ok = expand_vec_perm_1 (&dremap);
43720 gcc_assert (ok);
43722 ok = expand_vec_perm_1 (&dfinal);
43723 gcc_assert (ok);
43725 return true;
43728 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43729 a vector permutation using two instructions, vperm2f128 resp.
43730 vperm2i128 followed by any single in-lane permutation. */
43732 static bool
43733 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43735 struct expand_vec_perm_d dfirst, dsecond;
43736 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43737 bool ok;
43739 if (!TARGET_AVX
43740 || GET_MODE_SIZE (d->vmode) != 32
43741 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43742 return false;
43744 dsecond = *d;
43745 dsecond.one_operand_p = false;
43746 dsecond.testing_p = true;
43748 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43749 immediate. For perm < 16 the second permutation uses
43750 d->op0 as first operand, for perm >= 16 it uses d->op1
43751 as first operand. The second operand is the result of
43752 vperm2[fi]128. */
43753 for (perm = 0; perm < 32; perm++)
43755 /* Ignore permutations which do not move anything cross-lane. */
43756 if (perm < 16)
43758 /* The second shuffle for e.g. V4DFmode has
43759 0123 and ABCD operands.
43760 Ignore AB23, as 23 is already in the second lane
43761 of the first operand. */
43762 if ((perm & 0xc) == (1 << 2)) continue;
43763 /* And 01CD, as 01 is in the first lane of the first
43764 operand. */
43765 if ((perm & 3) == 0) continue;
43766 /* And 4567, as then the vperm2[fi]128 doesn't change
43767 anything on the original 4567 second operand. */
43768 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43770 else
43772 /* The second shuffle for e.g. V4DFmode has
43773 4567 and ABCD operands.
43774 Ignore AB67, as 67 is already in the second lane
43775 of the first operand. */
43776 if ((perm & 0xc) == (3 << 2)) continue;
43777 /* And 45CD, as 45 is in the first lane of the first
43778 operand. */
43779 if ((perm & 3) == 2) continue;
43780 /* And 0123, as then the vperm2[fi]128 doesn't change
43781 anything on the original 0123 first operand. */
43782 if ((perm & 0xf) == (1 << 2)) continue;
43785 for (i = 0; i < nelt; i++)
43787 j = d->perm[i] / nelt2;
43788 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43789 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43790 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43791 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43792 else
43793 break;
43796 if (i == nelt)
43798 start_sequence ();
43799 ok = expand_vec_perm_1 (&dsecond);
43800 end_sequence ();
43802 else
43803 ok = false;
43805 if (ok)
43807 if (d->testing_p)
43808 return true;
43810 /* Found a usable second shuffle. dfirst will be
43811 vperm2f128 on d->op0 and d->op1. */
43812 dsecond.testing_p = false;
43813 dfirst = *d;
43814 dfirst.target = gen_reg_rtx (d->vmode);
43815 for (i = 0; i < nelt; i++)
43816 dfirst.perm[i] = (i & (nelt2 - 1))
43817 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43819 ok = expand_vec_perm_1 (&dfirst);
43820 gcc_assert (ok);
43822 /* And dsecond is some single insn shuffle, taking
43823 d->op0 and result of vperm2f128 (if perm < 16) or
43824 d->op1 and result of vperm2f128 (otherwise). */
43825 dsecond.op1 = dfirst.target;
43826 if (perm >= 16)
43827 dsecond.op0 = dfirst.op1;
43829 ok = expand_vec_perm_1 (&dsecond);
43830 gcc_assert (ok);
43832 return true;
43835 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43836 if (d->one_operand_p)
43837 return false;
43840 return false;
43843 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43844 a two vector permutation using 2 intra-lane interleave insns
43845 and cross-lane shuffle for 32-byte vectors. */
43847 static bool
43848 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43850 unsigned i, nelt;
43851 rtx (*gen) (rtx, rtx, rtx);
43853 if (d->one_operand_p)
43854 return false;
43855 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43857 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43859 else
43860 return false;
43862 nelt = d->nelt;
43863 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43864 return false;
43865 for (i = 0; i < nelt; i += 2)
43866 if (d->perm[i] != d->perm[0] + i / 2
43867 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43868 return false;
43870 if (d->testing_p)
43871 return true;
43873 switch (d->vmode)
43875 case V32QImode:
43876 if (d->perm[0])
43877 gen = gen_vec_interleave_highv32qi;
43878 else
43879 gen = gen_vec_interleave_lowv32qi;
43880 break;
43881 case V16HImode:
43882 if (d->perm[0])
43883 gen = gen_vec_interleave_highv16hi;
43884 else
43885 gen = gen_vec_interleave_lowv16hi;
43886 break;
43887 case V8SImode:
43888 if (d->perm[0])
43889 gen = gen_vec_interleave_highv8si;
43890 else
43891 gen = gen_vec_interleave_lowv8si;
43892 break;
43893 case V4DImode:
43894 if (d->perm[0])
43895 gen = gen_vec_interleave_highv4di;
43896 else
43897 gen = gen_vec_interleave_lowv4di;
43898 break;
43899 case V8SFmode:
43900 if (d->perm[0])
43901 gen = gen_vec_interleave_highv8sf;
43902 else
43903 gen = gen_vec_interleave_lowv8sf;
43904 break;
43905 case V4DFmode:
43906 if (d->perm[0])
43907 gen = gen_vec_interleave_highv4df;
43908 else
43909 gen = gen_vec_interleave_lowv4df;
43910 break;
43911 default:
43912 gcc_unreachable ();
43915 emit_insn (gen (d->target, d->op0, d->op1));
43916 return true;
43919 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43920 a single vector permutation using a single intra-lane vector
43921 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43922 the non-swapped and swapped vectors together. */
43924 static bool
43925 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43927 struct expand_vec_perm_d dfirst, dsecond;
43928 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43929 rtx_insn *seq;
43930 bool ok;
43931 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43933 if (!TARGET_AVX
43934 || TARGET_AVX2
43935 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43936 || !d->one_operand_p)
43937 return false;
43939 dfirst = *d;
43940 for (i = 0; i < nelt; i++)
43941 dfirst.perm[i] = 0xff;
43942 for (i = 0, msk = 0; i < nelt; i++)
43944 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43945 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43946 return false;
43947 dfirst.perm[j] = d->perm[i];
43948 if (j != i)
43949 msk |= (1 << i);
43951 for (i = 0; i < nelt; i++)
43952 if (dfirst.perm[i] == 0xff)
43953 dfirst.perm[i] = i;
43955 if (!d->testing_p)
43956 dfirst.target = gen_reg_rtx (dfirst.vmode);
43958 start_sequence ();
43959 ok = expand_vec_perm_1 (&dfirst);
43960 seq = get_insns ();
43961 end_sequence ();
43963 if (!ok)
43964 return false;
43966 if (d->testing_p)
43967 return true;
43969 emit_insn (seq);
43971 dsecond = *d;
43972 dsecond.op0 = dfirst.target;
43973 dsecond.op1 = dfirst.target;
43974 dsecond.one_operand_p = true;
43975 dsecond.target = gen_reg_rtx (dsecond.vmode);
43976 for (i = 0; i < nelt; i++)
43977 dsecond.perm[i] = i ^ nelt2;
43979 ok = expand_vec_perm_1 (&dsecond);
43980 gcc_assert (ok);
43982 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43983 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43984 return true;
43987 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43988 permutation using two vperm2f128, followed by a vshufpd insn blending
43989 the two vectors together. */
43991 static bool
43992 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43994 struct expand_vec_perm_d dfirst, dsecond, dthird;
43995 bool ok;
43997 if (!TARGET_AVX || (d->vmode != V4DFmode))
43998 return false;
44000 if (d->testing_p)
44001 return true;
44003 dfirst = *d;
44004 dsecond = *d;
44005 dthird = *d;
44007 dfirst.perm[0] = (d->perm[0] & ~1);
44008 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
44009 dfirst.perm[2] = (d->perm[2] & ~1);
44010 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
44011 dsecond.perm[0] = (d->perm[1] & ~1);
44012 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
44013 dsecond.perm[2] = (d->perm[3] & ~1);
44014 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
44015 dthird.perm[0] = (d->perm[0] % 2);
44016 dthird.perm[1] = (d->perm[1] % 2) + 4;
44017 dthird.perm[2] = (d->perm[2] % 2) + 2;
44018 dthird.perm[3] = (d->perm[3] % 2) + 6;
44020 dfirst.target = gen_reg_rtx (dfirst.vmode);
44021 dsecond.target = gen_reg_rtx (dsecond.vmode);
44022 dthird.op0 = dfirst.target;
44023 dthird.op1 = dsecond.target;
44024 dthird.one_operand_p = false;
44026 canonicalize_perm (&dfirst);
44027 canonicalize_perm (&dsecond);
44029 ok = expand_vec_perm_1 (&dfirst)
44030 && expand_vec_perm_1 (&dsecond)
44031 && expand_vec_perm_1 (&dthird);
44033 gcc_assert (ok);
44035 return true;
44038 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44039 permutation with two pshufb insns and an ior. We should have already
44040 failed all two instruction sequences. */
44042 static bool
44043 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
44045 rtx rperm[2][16], vperm, l, h, op, m128;
44046 unsigned int i, nelt, eltsz;
44048 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
44049 return false;
44050 gcc_assert (!d->one_operand_p);
44052 if (d->testing_p)
44053 return true;
44055 nelt = d->nelt;
44056 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44058 /* Generate two permutation masks. If the required element is within
44059 the given vector it is shuffled into the proper lane. If the required
44060 element is in the other vector, force a zero into the lane by setting
44061 bit 7 in the permutation mask. */
44062 m128 = GEN_INT (-128);
44063 for (i = 0; i < nelt; ++i)
44065 unsigned j, e = d->perm[i];
44066 unsigned which = (e >= nelt);
44067 if (e >= nelt)
44068 e -= nelt;
44070 for (j = 0; j < eltsz; ++j)
44072 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
44073 rperm[1-which][i*eltsz + j] = m128;
44077 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
44078 vperm = force_reg (V16QImode, vperm);
44080 l = gen_reg_rtx (V16QImode);
44081 op = gen_lowpart (V16QImode, d->op0);
44082 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
44084 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
44085 vperm = force_reg (V16QImode, vperm);
44087 h = gen_reg_rtx (V16QImode);
44088 op = gen_lowpart (V16QImode, d->op1);
44089 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44091 op = d->target;
44092 if (d->vmode != V16QImode)
44093 op = gen_reg_rtx (V16QImode);
44094 emit_insn (gen_iorv16qi3 (op, l, h));
44095 if (op != d->target)
44096 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44098 return true;
44101 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44102 with two vpshufb insns, vpermq and vpor. We should have already failed
44103 all two or three instruction sequences. */
44105 static bool
44106 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44108 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44109 unsigned int i, nelt, eltsz;
44111 if (!TARGET_AVX2
44112 || !d->one_operand_p
44113 || (d->vmode != V32QImode && d->vmode != V16HImode))
44114 return false;
44116 if (d->testing_p)
44117 return true;
44119 nelt = d->nelt;
44120 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44122 /* Generate two permutation masks. If the required element is within
44123 the same lane, it is shuffled in. If the required element from the
44124 other lane, force a zero by setting bit 7 in the permutation mask.
44125 In the other mask the mask has non-negative elements if element
44126 is requested from the other lane, but also moved to the other lane,
44127 so that the result of vpshufb can have the two V2TImode halves
44128 swapped. */
44129 m128 = GEN_INT (-128);
44130 for (i = 0; i < nelt; ++i)
44132 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44133 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44135 for (j = 0; j < eltsz; ++j)
44137 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44138 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44142 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44143 vperm = force_reg (V32QImode, vperm);
44145 h = gen_reg_rtx (V32QImode);
44146 op = gen_lowpart (V32QImode, d->op0);
44147 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44149 /* Swap the 128-byte lanes of h into hp. */
44150 hp = gen_reg_rtx (V4DImode);
44151 op = gen_lowpart (V4DImode, h);
44152 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44153 const1_rtx));
44155 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44156 vperm = force_reg (V32QImode, vperm);
44158 l = gen_reg_rtx (V32QImode);
44159 op = gen_lowpart (V32QImode, d->op0);
44160 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44162 op = d->target;
44163 if (d->vmode != V32QImode)
44164 op = gen_reg_rtx (V32QImode);
44165 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44166 if (op != d->target)
44167 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44169 return true;
44172 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44173 and extract-odd permutations of two V32QImode and V16QImode operand
44174 with two vpshufb insns, vpor and vpermq. We should have already
44175 failed all two or three instruction sequences. */
44177 static bool
44178 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44180 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44181 unsigned int i, nelt, eltsz;
44183 if (!TARGET_AVX2
44184 || d->one_operand_p
44185 || (d->vmode != V32QImode && d->vmode != V16HImode))
44186 return false;
44188 for (i = 0; i < d->nelt; ++i)
44189 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44190 return false;
44192 if (d->testing_p)
44193 return true;
44195 nelt = d->nelt;
44196 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44198 /* Generate two permutation masks. In the first permutation mask
44199 the first quarter will contain indexes for the first half
44200 of the op0, the second quarter will contain bit 7 set, third quarter
44201 will contain indexes for the second half of the op0 and the
44202 last quarter bit 7 set. In the second permutation mask
44203 the first quarter will contain bit 7 set, the second quarter
44204 indexes for the first half of the op1, the third quarter bit 7 set
44205 and last quarter indexes for the second half of the op1.
44206 I.e. the first mask e.g. for V32QImode extract even will be:
44207 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44208 (all values masked with 0xf except for -128) and second mask
44209 for extract even will be
44210 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44211 m128 = GEN_INT (-128);
44212 for (i = 0; i < nelt; ++i)
44214 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44215 unsigned which = d->perm[i] >= nelt;
44216 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44218 for (j = 0; j < eltsz; ++j)
44220 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44221 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44225 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44226 vperm = force_reg (V32QImode, vperm);
44228 l = gen_reg_rtx (V32QImode);
44229 op = gen_lowpart (V32QImode, d->op0);
44230 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44232 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44233 vperm = force_reg (V32QImode, vperm);
44235 h = gen_reg_rtx (V32QImode);
44236 op = gen_lowpart (V32QImode, d->op1);
44237 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44239 ior = gen_reg_rtx (V32QImode);
44240 emit_insn (gen_iorv32qi3 (ior, l, h));
44242 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44243 op = gen_reg_rtx (V4DImode);
44244 ior = gen_lowpart (V4DImode, ior);
44245 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44246 const1_rtx, GEN_INT (3)));
44247 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44249 return true;
44252 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44253 and extract-odd permutations. */
44255 static bool
44256 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44258 rtx t1, t2, t3, t4, t5;
44260 switch (d->vmode)
44262 case V4DFmode:
44263 if (d->testing_p)
44264 break;
44265 t1 = gen_reg_rtx (V4DFmode);
44266 t2 = gen_reg_rtx (V4DFmode);
44268 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44269 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44270 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44272 /* Now an unpck[lh]pd will produce the result required. */
44273 if (odd)
44274 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44275 else
44276 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44277 emit_insn (t3);
44278 break;
44280 case V8SFmode:
44282 int mask = odd ? 0xdd : 0x88;
44284 if (d->testing_p)
44285 break;
44286 t1 = gen_reg_rtx (V8SFmode);
44287 t2 = gen_reg_rtx (V8SFmode);
44288 t3 = gen_reg_rtx (V8SFmode);
44290 /* Shuffle within the 128-bit lanes to produce:
44291 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44292 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44293 GEN_INT (mask)));
44295 /* Shuffle the lanes around to produce:
44296 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44297 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44298 GEN_INT (0x3)));
44300 /* Shuffle within the 128-bit lanes to produce:
44301 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44302 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44304 /* Shuffle within the 128-bit lanes to produce:
44305 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44306 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44308 /* Shuffle the lanes around to produce:
44309 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44310 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44311 GEN_INT (0x20)));
44313 break;
44315 case V2DFmode:
44316 case V4SFmode:
44317 case V2DImode:
44318 case V4SImode:
44319 /* These are always directly implementable by expand_vec_perm_1. */
44320 gcc_unreachable ();
44322 case V8HImode:
44323 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44324 return expand_vec_perm_pshufb2 (d);
44325 else
44327 if (d->testing_p)
44328 break;
44329 /* We need 2*log2(N)-1 operations to achieve odd/even
44330 with interleave. */
44331 t1 = gen_reg_rtx (V8HImode);
44332 t2 = gen_reg_rtx (V8HImode);
44333 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44334 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44335 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44336 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44337 if (odd)
44338 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44339 else
44340 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44341 emit_insn (t3);
44343 break;
44345 case V16QImode:
44346 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44347 return expand_vec_perm_pshufb2 (d);
44348 else
44350 if (d->testing_p)
44351 break;
44352 t1 = gen_reg_rtx (V16QImode);
44353 t2 = gen_reg_rtx (V16QImode);
44354 t3 = gen_reg_rtx (V16QImode);
44355 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44356 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44357 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44358 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44359 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44360 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44361 if (odd)
44362 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44363 else
44364 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44365 emit_insn (t3);
44367 break;
44369 case V16HImode:
44370 case V32QImode:
44371 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44373 case V4DImode:
44374 if (!TARGET_AVX2)
44376 struct expand_vec_perm_d d_copy = *d;
44377 d_copy.vmode = V4DFmode;
44378 if (d->testing_p)
44379 d_copy.target = gen_lowpart (V4DFmode, d->target);
44380 else
44381 d_copy.target = gen_reg_rtx (V4DFmode);
44382 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44383 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44384 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44386 if (!d->testing_p)
44387 emit_move_insn (d->target,
44388 gen_lowpart (V4DImode, d_copy.target));
44389 return true;
44391 return false;
44394 if (d->testing_p)
44395 break;
44397 t1 = gen_reg_rtx (V4DImode);
44398 t2 = gen_reg_rtx (V4DImode);
44400 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44401 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44402 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44404 /* Now an vpunpck[lh]qdq will produce the result required. */
44405 if (odd)
44406 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44407 else
44408 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44409 emit_insn (t3);
44410 break;
44412 case V8SImode:
44413 if (!TARGET_AVX2)
44415 struct expand_vec_perm_d d_copy = *d;
44416 d_copy.vmode = V8SFmode;
44417 if (d->testing_p)
44418 d_copy.target = gen_lowpart (V8SFmode, d->target);
44419 else
44420 d_copy.target = gen_reg_rtx (V8SFmode);
44421 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44422 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44423 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44425 if (!d->testing_p)
44426 emit_move_insn (d->target,
44427 gen_lowpart (V8SImode, d_copy.target));
44428 return true;
44430 return false;
44433 if (d->testing_p)
44434 break;
44436 t1 = gen_reg_rtx (V8SImode);
44437 t2 = gen_reg_rtx (V8SImode);
44438 t3 = gen_reg_rtx (V4DImode);
44439 t4 = gen_reg_rtx (V4DImode);
44440 t5 = gen_reg_rtx (V4DImode);
44442 /* Shuffle the lanes around into
44443 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44444 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44445 gen_lowpart (V4DImode, d->op1),
44446 GEN_INT (0x20)));
44447 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44448 gen_lowpart (V4DImode, d->op1),
44449 GEN_INT (0x31)));
44451 /* Swap the 2nd and 3rd position in each lane into
44452 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44453 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44454 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44455 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44456 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44458 /* Now an vpunpck[lh]qdq will produce
44459 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44460 if (odd)
44461 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44462 gen_lowpart (V4DImode, t2));
44463 else
44464 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44465 gen_lowpart (V4DImode, t2));
44466 emit_insn (t3);
44467 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44468 break;
44470 default:
44471 gcc_unreachable ();
44474 return true;
44477 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44478 extract-even and extract-odd permutations. */
44480 static bool
44481 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44483 unsigned i, odd, nelt = d->nelt;
44485 odd = d->perm[0];
44486 if (odd != 0 && odd != 1)
44487 return false;
44489 for (i = 1; i < nelt; ++i)
44490 if (d->perm[i] != 2 * i + odd)
44491 return false;
44493 return expand_vec_perm_even_odd_1 (d, odd);
44496 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44497 permutations. We assume that expand_vec_perm_1 has already failed. */
44499 static bool
44500 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44502 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44503 enum machine_mode vmode = d->vmode;
44504 unsigned char perm2[4];
44505 rtx op0 = d->op0, dest;
44506 bool ok;
44508 switch (vmode)
44510 case V4DFmode:
44511 case V8SFmode:
44512 /* These are special-cased in sse.md so that we can optionally
44513 use the vbroadcast instruction. They expand to two insns
44514 if the input happens to be in a register. */
44515 gcc_unreachable ();
44517 case V2DFmode:
44518 case V2DImode:
44519 case V4SFmode:
44520 case V4SImode:
44521 /* These are always implementable using standard shuffle patterns. */
44522 gcc_unreachable ();
44524 case V8HImode:
44525 case V16QImode:
44526 /* These can be implemented via interleave. We save one insn by
44527 stopping once we have promoted to V4SImode and then use pshufd. */
44528 if (d->testing_p)
44529 return true;
44532 rtx dest;
44533 rtx (*gen) (rtx, rtx, rtx)
44534 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44535 : gen_vec_interleave_lowv8hi;
44537 if (elt >= nelt2)
44539 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44540 : gen_vec_interleave_highv8hi;
44541 elt -= nelt2;
44543 nelt2 /= 2;
44545 dest = gen_reg_rtx (vmode);
44546 emit_insn (gen (dest, op0, op0));
44547 vmode = get_mode_wider_vector (vmode);
44548 op0 = gen_lowpart (vmode, dest);
44550 while (vmode != V4SImode);
44552 memset (perm2, elt, 4);
44553 dest = gen_reg_rtx (V4SImode);
44554 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44555 gcc_assert (ok);
44556 if (!d->testing_p)
44557 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44558 return true;
44560 case V32QImode:
44561 case V16HImode:
44562 case V8SImode:
44563 case V4DImode:
44564 /* For AVX2 broadcasts of the first element vpbroadcast* or
44565 vpermq should be used by expand_vec_perm_1. */
44566 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44567 return false;
44569 default:
44570 gcc_unreachable ();
44574 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44575 broadcast permutations. */
44577 static bool
44578 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44580 unsigned i, elt, nelt = d->nelt;
44582 if (!d->one_operand_p)
44583 return false;
44585 elt = d->perm[0];
44586 for (i = 1; i < nelt; ++i)
44587 if (d->perm[i] != elt)
44588 return false;
44590 return expand_vec_perm_broadcast_1 (d);
44593 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44594 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44595 all the shorter instruction sequences. */
44597 static bool
44598 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44600 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44601 unsigned int i, nelt, eltsz;
44602 bool used[4];
44604 if (!TARGET_AVX2
44605 || d->one_operand_p
44606 || (d->vmode != V32QImode && d->vmode != V16HImode))
44607 return false;
44609 if (d->testing_p)
44610 return true;
44612 nelt = d->nelt;
44613 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44615 /* Generate 4 permutation masks. If the required element is within
44616 the same lane, it is shuffled in. If the required element from the
44617 other lane, force a zero by setting bit 7 in the permutation mask.
44618 In the other mask the mask has non-negative elements if element
44619 is requested from the other lane, but also moved to the other lane,
44620 so that the result of vpshufb can have the two V2TImode halves
44621 swapped. */
44622 m128 = GEN_INT (-128);
44623 for (i = 0; i < 32; ++i)
44625 rperm[0][i] = m128;
44626 rperm[1][i] = m128;
44627 rperm[2][i] = m128;
44628 rperm[3][i] = m128;
44630 used[0] = false;
44631 used[1] = false;
44632 used[2] = false;
44633 used[3] = false;
44634 for (i = 0; i < nelt; ++i)
44636 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44637 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44638 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44640 for (j = 0; j < eltsz; ++j)
44641 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44642 used[which] = true;
44645 for (i = 0; i < 2; ++i)
44647 if (!used[2 * i + 1])
44649 h[i] = NULL_RTX;
44650 continue;
44652 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44653 gen_rtvec_v (32, rperm[2 * i + 1]));
44654 vperm = force_reg (V32QImode, vperm);
44655 h[i] = gen_reg_rtx (V32QImode);
44656 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44657 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44660 /* Swap the 128-byte lanes of h[X]. */
44661 for (i = 0; i < 2; ++i)
44663 if (h[i] == NULL_RTX)
44664 continue;
44665 op = gen_reg_rtx (V4DImode);
44666 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44667 const2_rtx, GEN_INT (3), const0_rtx,
44668 const1_rtx));
44669 h[i] = gen_lowpart (V32QImode, op);
44672 for (i = 0; i < 2; ++i)
44674 if (!used[2 * i])
44676 l[i] = NULL_RTX;
44677 continue;
44679 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44680 vperm = force_reg (V32QImode, vperm);
44681 l[i] = gen_reg_rtx (V32QImode);
44682 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44683 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44686 for (i = 0; i < 2; ++i)
44688 if (h[i] && l[i])
44690 op = gen_reg_rtx (V32QImode);
44691 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44692 l[i] = op;
44694 else if (h[i])
44695 l[i] = h[i];
44698 gcc_assert (l[0] && l[1]);
44699 op = d->target;
44700 if (d->vmode != V32QImode)
44701 op = gen_reg_rtx (V32QImode);
44702 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44703 if (op != d->target)
44704 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44705 return true;
44708 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44709 With all of the interface bits taken care of, perform the expansion
44710 in D and return true on success. */
44712 static bool
44713 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44715 /* Try a single instruction expansion. */
44716 if (expand_vec_perm_1 (d))
44717 return true;
44719 /* Try sequences of two instructions. */
44721 if (expand_vec_perm_pshuflw_pshufhw (d))
44722 return true;
44724 if (expand_vec_perm_palignr (d))
44725 return true;
44727 if (expand_vec_perm_interleave2 (d))
44728 return true;
44730 if (expand_vec_perm_broadcast (d))
44731 return true;
44733 if (expand_vec_perm_vpermq_perm_1 (d))
44734 return true;
44736 if (expand_vec_perm_vperm2f128 (d))
44737 return true;
44739 if (expand_vec_perm_pblendv (d))
44740 return true;
44742 /* Try sequences of three instructions. */
44744 if (expand_vec_perm_2vperm2f128_vshuf (d))
44745 return true;
44747 if (expand_vec_perm_pshufb2 (d))
44748 return true;
44750 if (expand_vec_perm_interleave3 (d))
44751 return true;
44753 if (expand_vec_perm_vperm2f128_vblend (d))
44754 return true;
44756 /* Try sequences of four instructions. */
44758 if (expand_vec_perm_vpshufb2_vpermq (d))
44759 return true;
44761 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44762 return true;
44764 /* ??? Look for narrow permutations whose element orderings would
44765 allow the promotion to a wider mode. */
44767 /* ??? Look for sequences of interleave or a wider permute that place
44768 the data into the correct lanes for a half-vector shuffle like
44769 pshuf[lh]w or vpermilps. */
44771 /* ??? Look for sequences of interleave that produce the desired results.
44772 The combinatorics of punpck[lh] get pretty ugly... */
44774 if (expand_vec_perm_even_odd (d))
44775 return true;
44777 /* Even longer sequences. */
44778 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44779 return true;
44781 return false;
44784 /* If a permutation only uses one operand, make it clear. Returns true
44785 if the permutation references both operands. */
44787 static bool
44788 canonicalize_perm (struct expand_vec_perm_d *d)
44790 int i, which, nelt = d->nelt;
44792 for (i = which = 0; i < nelt; ++i)
44793 which |= (d->perm[i] < nelt ? 1 : 2);
44795 d->one_operand_p = true;
44796 switch (which)
44798 default:
44799 gcc_unreachable();
44801 case 3:
44802 if (!rtx_equal_p (d->op0, d->op1))
44804 d->one_operand_p = false;
44805 break;
44807 /* The elements of PERM do not suggest that only the first operand
44808 is used, but both operands are identical. Allow easier matching
44809 of the permutation by folding the permutation into the single
44810 input vector. */
44811 /* FALLTHRU */
44813 case 2:
44814 for (i = 0; i < nelt; ++i)
44815 d->perm[i] &= nelt - 1;
44816 d->op0 = d->op1;
44817 break;
44819 case 1:
44820 d->op1 = d->op0;
44821 break;
44824 return (which == 3);
44827 bool
44828 ix86_expand_vec_perm_const (rtx operands[4])
44830 struct expand_vec_perm_d d;
44831 unsigned char perm[MAX_VECT_LEN];
44832 int i, nelt;
44833 bool two_args;
44834 rtx sel;
44836 d.target = operands[0];
44837 d.op0 = operands[1];
44838 d.op1 = operands[2];
44839 sel = operands[3];
44841 d.vmode = GET_MODE (d.target);
44842 gcc_assert (VECTOR_MODE_P (d.vmode));
44843 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44844 d.testing_p = false;
44846 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44847 gcc_assert (XVECLEN (sel, 0) == nelt);
44848 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44850 for (i = 0; i < nelt; ++i)
44852 rtx e = XVECEXP (sel, 0, i);
44853 int ei = INTVAL (e) & (2 * nelt - 1);
44854 d.perm[i] = ei;
44855 perm[i] = ei;
44858 two_args = canonicalize_perm (&d);
44860 if (ix86_expand_vec_perm_const_1 (&d))
44861 return true;
44863 /* If the selector says both arguments are needed, but the operands are the
44864 same, the above tried to expand with one_operand_p and flattened selector.
44865 If that didn't work, retry without one_operand_p; we succeeded with that
44866 during testing. */
44867 if (two_args && d.one_operand_p)
44869 d.one_operand_p = false;
44870 memcpy (d.perm, perm, sizeof (perm));
44871 return ix86_expand_vec_perm_const_1 (&d);
44874 return false;
44877 /* Implement targetm.vectorize.vec_perm_const_ok. */
44879 static bool
44880 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44881 const unsigned char *sel)
44883 struct expand_vec_perm_d d;
44884 unsigned int i, nelt, which;
44885 bool ret;
44887 d.vmode = vmode;
44888 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44889 d.testing_p = true;
44891 /* Given sufficient ISA support we can just return true here
44892 for selected vector modes. */
44893 if (d.vmode == V16SImode || d.vmode == V16SFmode
44894 || d.vmode == V8DFmode || d.vmode == V8DImode)
44895 /* All implementable with a single vpermi2 insn. */
44896 return true;
44897 if (GET_MODE_SIZE (d.vmode) == 16)
44899 /* All implementable with a single vpperm insn. */
44900 if (TARGET_XOP)
44901 return true;
44902 /* All implementable with 2 pshufb + 1 ior. */
44903 if (TARGET_SSSE3)
44904 return true;
44905 /* All implementable with shufpd or unpck[lh]pd. */
44906 if (d.nelt == 2)
44907 return true;
44910 /* Extract the values from the vector CST into the permutation
44911 array in D. */
44912 memcpy (d.perm, sel, nelt);
44913 for (i = which = 0; i < nelt; ++i)
44915 unsigned char e = d.perm[i];
44916 gcc_assert (e < 2 * nelt);
44917 which |= (e < nelt ? 1 : 2);
44920 /* For all elements from second vector, fold the elements to first. */
44921 if (which == 2)
44922 for (i = 0; i < nelt; ++i)
44923 d.perm[i] -= nelt;
44925 /* Check whether the mask can be applied to the vector type. */
44926 d.one_operand_p = (which != 3);
44928 /* Implementable with shufps or pshufd. */
44929 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44930 return true;
44932 /* Otherwise we have to go through the motions and see if we can
44933 figure out how to generate the requested permutation. */
44934 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44935 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44936 if (!d.one_operand_p)
44937 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44939 start_sequence ();
44940 ret = ix86_expand_vec_perm_const_1 (&d);
44941 end_sequence ();
44943 return ret;
44946 void
44947 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44949 struct expand_vec_perm_d d;
44950 unsigned i, nelt;
44952 d.target = targ;
44953 d.op0 = op0;
44954 d.op1 = op1;
44955 d.vmode = GET_MODE (targ);
44956 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44957 d.one_operand_p = false;
44958 d.testing_p = false;
44960 for (i = 0; i < nelt; ++i)
44961 d.perm[i] = i * 2 + odd;
44963 /* We'll either be able to implement the permutation directly... */
44964 if (expand_vec_perm_1 (&d))
44965 return;
44967 /* ... or we use the special-case patterns. */
44968 expand_vec_perm_even_odd_1 (&d, odd);
44971 static void
44972 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44974 struct expand_vec_perm_d d;
44975 unsigned i, nelt, base;
44976 bool ok;
44978 d.target = targ;
44979 d.op0 = op0;
44980 d.op1 = op1;
44981 d.vmode = GET_MODE (targ);
44982 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44983 d.one_operand_p = false;
44984 d.testing_p = false;
44986 base = high_p ? nelt / 2 : 0;
44987 for (i = 0; i < nelt / 2; ++i)
44989 d.perm[i * 2] = i + base;
44990 d.perm[i * 2 + 1] = i + base + nelt;
44993 /* Note that for AVX this isn't one instruction. */
44994 ok = ix86_expand_vec_perm_const_1 (&d);
44995 gcc_assert (ok);
44999 /* Expand a vector operation CODE for a V*QImode in terms of the
45000 same operation on V*HImode. */
45002 void
45003 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
45005 enum machine_mode qimode = GET_MODE (dest);
45006 enum machine_mode himode;
45007 rtx (*gen_il) (rtx, rtx, rtx);
45008 rtx (*gen_ih) (rtx, rtx, rtx);
45009 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
45010 struct expand_vec_perm_d d;
45011 bool ok, full_interleave;
45012 bool uns_p = false;
45013 int i;
45015 switch (qimode)
45017 case V16QImode:
45018 himode = V8HImode;
45019 gen_il = gen_vec_interleave_lowv16qi;
45020 gen_ih = gen_vec_interleave_highv16qi;
45021 break;
45022 case V32QImode:
45023 himode = V16HImode;
45024 gen_il = gen_avx2_interleave_lowv32qi;
45025 gen_ih = gen_avx2_interleave_highv32qi;
45026 break;
45027 default:
45028 gcc_unreachable ();
45031 op2_l = op2_h = op2;
45032 switch (code)
45034 case MULT:
45035 /* Unpack data such that we've got a source byte in each low byte of
45036 each word. We don't care what goes into the high byte of each word.
45037 Rather than trying to get zero in there, most convenient is to let
45038 it be a copy of the low byte. */
45039 op2_l = gen_reg_rtx (qimode);
45040 op2_h = gen_reg_rtx (qimode);
45041 emit_insn (gen_il (op2_l, op2, op2));
45042 emit_insn (gen_ih (op2_h, op2, op2));
45043 /* FALLTHRU */
45045 op1_l = gen_reg_rtx (qimode);
45046 op1_h = gen_reg_rtx (qimode);
45047 emit_insn (gen_il (op1_l, op1, op1));
45048 emit_insn (gen_ih (op1_h, op1, op1));
45049 full_interleave = qimode == V16QImode;
45050 break;
45052 case ASHIFT:
45053 case LSHIFTRT:
45054 uns_p = true;
45055 /* FALLTHRU */
45056 case ASHIFTRT:
45057 op1_l = gen_reg_rtx (himode);
45058 op1_h = gen_reg_rtx (himode);
45059 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
45060 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
45061 full_interleave = true;
45062 break;
45063 default:
45064 gcc_unreachable ();
45067 /* Perform the operation. */
45068 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
45069 1, OPTAB_DIRECT);
45070 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
45071 1, OPTAB_DIRECT);
45072 gcc_assert (res_l && res_h);
45074 /* Merge the data back into the right place. */
45075 d.target = dest;
45076 d.op0 = gen_lowpart (qimode, res_l);
45077 d.op1 = gen_lowpart (qimode, res_h);
45078 d.vmode = qimode;
45079 d.nelt = GET_MODE_NUNITS (qimode);
45080 d.one_operand_p = false;
45081 d.testing_p = false;
45083 if (full_interleave)
45085 /* For SSE2, we used an full interleave, so the desired
45086 results are in the even elements. */
45087 for (i = 0; i < 32; ++i)
45088 d.perm[i] = i * 2;
45090 else
45092 /* For AVX, the interleave used above was not cross-lane. So the
45093 extraction is evens but with the second and third quarter swapped.
45094 Happily, that is even one insn shorter than even extraction. */
45095 for (i = 0; i < 32; ++i)
45096 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45099 ok = ix86_expand_vec_perm_const_1 (&d);
45100 gcc_assert (ok);
45102 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45103 gen_rtx_fmt_ee (code, qimode, op1, op2));
45106 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45107 if op is CONST_VECTOR with all odd elements equal to their
45108 preceding element. */
45110 static bool
45111 const_vector_equal_evenodd_p (rtx op)
45113 enum machine_mode mode = GET_MODE (op);
45114 int i, nunits = GET_MODE_NUNITS (mode);
45115 if (GET_CODE (op) != CONST_VECTOR
45116 || nunits != CONST_VECTOR_NUNITS (op))
45117 return false;
45118 for (i = 0; i < nunits; i += 2)
45119 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45120 return false;
45121 return true;
45124 void
45125 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45126 bool uns_p, bool odd_p)
45128 enum machine_mode mode = GET_MODE (op1);
45129 enum machine_mode wmode = GET_MODE (dest);
45130 rtx x;
45131 rtx orig_op1 = op1, orig_op2 = op2;
45133 if (!nonimmediate_operand (op1, mode))
45134 op1 = force_reg (mode, op1);
45135 if (!nonimmediate_operand (op2, mode))
45136 op2 = force_reg (mode, op2);
45138 /* We only play even/odd games with vectors of SImode. */
45139 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45141 /* If we're looking for the odd results, shift those members down to
45142 the even slots. For some cpus this is faster than a PSHUFD. */
45143 if (odd_p)
45145 /* For XOP use vpmacsdqh, but only for smult, as it is only
45146 signed. */
45147 if (TARGET_XOP && mode == V4SImode && !uns_p)
45149 x = force_reg (wmode, CONST0_RTX (wmode));
45150 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45151 return;
45154 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45155 if (!const_vector_equal_evenodd_p (orig_op1))
45156 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45157 x, NULL, 1, OPTAB_DIRECT);
45158 if (!const_vector_equal_evenodd_p (orig_op2))
45159 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45160 x, NULL, 1, OPTAB_DIRECT);
45161 op1 = gen_lowpart (mode, op1);
45162 op2 = gen_lowpart (mode, op2);
45165 if (mode == V16SImode)
45167 if (uns_p)
45168 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45169 else
45170 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45172 else if (mode == V8SImode)
45174 if (uns_p)
45175 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45176 else
45177 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45179 else if (uns_p)
45180 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45181 else if (TARGET_SSE4_1)
45182 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45183 else
45185 rtx s1, s2, t0, t1, t2;
45187 /* The easiest way to implement this without PMULDQ is to go through
45188 the motions as if we are performing a full 64-bit multiply. With
45189 the exception that we need to do less shuffling of the elements. */
45191 /* Compute the sign-extension, aka highparts, of the two operands. */
45192 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45193 op1, pc_rtx, pc_rtx);
45194 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45195 op2, pc_rtx, pc_rtx);
45197 /* Multiply LO(A) * HI(B), and vice-versa. */
45198 t1 = gen_reg_rtx (wmode);
45199 t2 = gen_reg_rtx (wmode);
45200 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45201 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45203 /* Multiply LO(A) * LO(B). */
45204 t0 = gen_reg_rtx (wmode);
45205 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45207 /* Combine and shift the highparts into place. */
45208 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45209 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45210 1, OPTAB_DIRECT);
45212 /* Combine high and low parts. */
45213 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45214 return;
45216 emit_insn (x);
45219 void
45220 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45221 bool uns_p, bool high_p)
45223 enum machine_mode wmode = GET_MODE (dest);
45224 enum machine_mode mode = GET_MODE (op1);
45225 rtx t1, t2, t3, t4, mask;
45227 switch (mode)
45229 case V4SImode:
45230 t1 = gen_reg_rtx (mode);
45231 t2 = gen_reg_rtx (mode);
45232 if (TARGET_XOP && !uns_p)
45234 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45235 shuffle the elements once so that all elements are in the right
45236 place for immediate use: { A C B D }. */
45237 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45238 const1_rtx, GEN_INT (3)));
45239 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45240 const1_rtx, GEN_INT (3)));
45242 else
45244 /* Put the elements into place for the multiply. */
45245 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45246 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45247 high_p = false;
45249 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45250 break;
45252 case V8SImode:
45253 /* Shuffle the elements between the lanes. After this we
45254 have { A B E F | C D G H } for each operand. */
45255 t1 = gen_reg_rtx (V4DImode);
45256 t2 = gen_reg_rtx (V4DImode);
45257 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45258 const0_rtx, const2_rtx,
45259 const1_rtx, GEN_INT (3)));
45260 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45261 const0_rtx, const2_rtx,
45262 const1_rtx, GEN_INT (3)));
45264 /* Shuffle the elements within the lanes. After this we
45265 have { A A B B | C C D D } or { E E F F | G G H H }. */
45266 t3 = gen_reg_rtx (V8SImode);
45267 t4 = gen_reg_rtx (V8SImode);
45268 mask = GEN_INT (high_p
45269 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45270 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45271 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45272 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45274 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45275 break;
45277 case V8HImode:
45278 case V16HImode:
45279 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45280 uns_p, OPTAB_DIRECT);
45281 t2 = expand_binop (mode,
45282 uns_p ? umul_highpart_optab : smul_highpart_optab,
45283 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45284 gcc_assert (t1 && t2);
45286 t3 = gen_reg_rtx (mode);
45287 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45288 emit_move_insn (dest, gen_lowpart (wmode, t3));
45289 break;
45291 case V16QImode:
45292 case V32QImode:
45293 t1 = gen_reg_rtx (wmode);
45294 t2 = gen_reg_rtx (wmode);
45295 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45296 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45298 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45299 break;
45301 default:
45302 gcc_unreachable ();
45306 void
45307 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45309 rtx res_1, res_2, res_3, res_4;
45311 res_1 = gen_reg_rtx (V4SImode);
45312 res_2 = gen_reg_rtx (V4SImode);
45313 res_3 = gen_reg_rtx (V2DImode);
45314 res_4 = gen_reg_rtx (V2DImode);
45315 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45316 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45318 /* Move the results in element 2 down to element 1; we don't care
45319 what goes in elements 2 and 3. Then we can merge the parts
45320 back together with an interleave.
45322 Note that two other sequences were tried:
45323 (1) Use interleaves at the start instead of psrldq, which allows
45324 us to use a single shufps to merge things back at the end.
45325 (2) Use shufps here to combine the two vectors, then pshufd to
45326 put the elements in the correct order.
45327 In both cases the cost of the reformatting stall was too high
45328 and the overall sequence slower. */
45330 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45331 const0_rtx, const2_rtx,
45332 const0_rtx, const0_rtx));
45333 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45334 const0_rtx, const2_rtx,
45335 const0_rtx, const0_rtx));
45336 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45338 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45341 void
45342 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45344 enum machine_mode mode = GET_MODE (op0);
45345 rtx t1, t2, t3, t4, t5, t6;
45347 if (TARGET_XOP && mode == V2DImode)
45349 /* op1: A,B,C,D, op2: E,F,G,H */
45350 op1 = gen_lowpart (V4SImode, op1);
45351 op2 = gen_lowpart (V4SImode, op2);
45353 t1 = gen_reg_rtx (V4SImode);
45354 t2 = gen_reg_rtx (V4SImode);
45355 t3 = gen_reg_rtx (V2DImode);
45356 t4 = gen_reg_rtx (V2DImode);
45358 /* t1: B,A,D,C */
45359 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45360 GEN_INT (1),
45361 GEN_INT (0),
45362 GEN_INT (3),
45363 GEN_INT (2)));
45365 /* t2: (B*E),(A*F),(D*G),(C*H) */
45366 emit_insn (gen_mulv4si3 (t2, t1, op2));
45368 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45369 emit_insn (gen_xop_phadddq (t3, t2));
45371 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45372 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45374 /* Multiply lower parts and add all */
45375 t5 = gen_reg_rtx (V2DImode);
45376 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45377 gen_lowpart (V4SImode, op1),
45378 gen_lowpart (V4SImode, op2)));
45379 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45382 else
45384 enum machine_mode nmode;
45385 rtx (*umul) (rtx, rtx, rtx);
45387 if (mode == V2DImode)
45389 umul = gen_vec_widen_umult_even_v4si;
45390 nmode = V4SImode;
45392 else if (mode == V4DImode)
45394 umul = gen_vec_widen_umult_even_v8si;
45395 nmode = V8SImode;
45397 else if (mode == V8DImode)
45399 umul = gen_vec_widen_umult_even_v16si;
45400 nmode = V16SImode;
45402 else
45403 gcc_unreachable ();
45406 /* Multiply low parts. */
45407 t1 = gen_reg_rtx (mode);
45408 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45410 /* Shift input vectors right 32 bits so we can multiply high parts. */
45411 t6 = GEN_INT (32);
45412 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45413 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45415 /* Multiply high parts by low parts. */
45416 t4 = gen_reg_rtx (mode);
45417 t5 = gen_reg_rtx (mode);
45418 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45419 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45421 /* Combine and shift the highparts back. */
45422 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45423 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45425 /* Combine high and low parts. */
45426 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45429 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45430 gen_rtx_MULT (mode, op1, op2));
45433 /* Calculate integer abs() using only SSE2 instructions. */
45435 void
45436 ix86_expand_sse2_abs (rtx target, rtx input)
45438 enum machine_mode mode = GET_MODE (target);
45439 rtx tmp0, tmp1, x;
45441 switch (mode)
45443 /* For 32-bit signed integer X, the best way to calculate the absolute
45444 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45445 case V4SImode:
45446 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45447 GEN_INT (GET_MODE_BITSIZE
45448 (GET_MODE_INNER (mode)) - 1),
45449 NULL, 0, OPTAB_DIRECT);
45450 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45451 NULL, 0, OPTAB_DIRECT);
45452 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45453 target, 0, OPTAB_DIRECT);
45454 break;
45456 /* For 16-bit signed integer X, the best way to calculate the absolute
45457 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45458 case V8HImode:
45459 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45461 x = expand_simple_binop (mode, SMAX, tmp0, input,
45462 target, 0, OPTAB_DIRECT);
45463 break;
45465 /* For 8-bit signed integer X, the best way to calculate the absolute
45466 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45467 as SSE2 provides the PMINUB insn. */
45468 case V16QImode:
45469 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45471 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45472 target, 0, OPTAB_DIRECT);
45473 break;
45475 default:
45476 gcc_unreachable ();
45479 if (x != target)
45480 emit_move_insn (target, x);
45483 /* Expand an insert into a vector register through pinsr insn.
45484 Return true if successful. */
45486 bool
45487 ix86_expand_pinsr (rtx *operands)
45489 rtx dst = operands[0];
45490 rtx src = operands[3];
45492 unsigned int size = INTVAL (operands[1]);
45493 unsigned int pos = INTVAL (operands[2]);
45495 if (GET_CODE (dst) == SUBREG)
45497 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45498 dst = SUBREG_REG (dst);
45501 if (GET_CODE (src) == SUBREG)
45502 src = SUBREG_REG (src);
45504 switch (GET_MODE (dst))
45506 case V16QImode:
45507 case V8HImode:
45508 case V4SImode:
45509 case V2DImode:
45511 enum machine_mode srcmode, dstmode;
45512 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45514 srcmode = mode_for_size (size, MODE_INT, 0);
45516 switch (srcmode)
45518 case QImode:
45519 if (!TARGET_SSE4_1)
45520 return false;
45521 dstmode = V16QImode;
45522 pinsr = gen_sse4_1_pinsrb;
45523 break;
45525 case HImode:
45526 if (!TARGET_SSE2)
45527 return false;
45528 dstmode = V8HImode;
45529 pinsr = gen_sse2_pinsrw;
45530 break;
45532 case SImode:
45533 if (!TARGET_SSE4_1)
45534 return false;
45535 dstmode = V4SImode;
45536 pinsr = gen_sse4_1_pinsrd;
45537 break;
45539 case DImode:
45540 gcc_assert (TARGET_64BIT);
45541 if (!TARGET_SSE4_1)
45542 return false;
45543 dstmode = V2DImode;
45544 pinsr = gen_sse4_1_pinsrq;
45545 break;
45547 default:
45548 return false;
45551 rtx d = dst;
45552 if (GET_MODE (dst) != dstmode)
45553 d = gen_reg_rtx (dstmode);
45554 src = gen_lowpart (srcmode, src);
45556 pos /= size;
45558 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45559 GEN_INT (1 << pos)));
45560 if (d != dst)
45561 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45562 return true;
45565 default:
45566 return false;
45570 /* This function returns the calling abi specific va_list type node.
45571 It returns the FNDECL specific va_list type. */
45573 static tree
45574 ix86_fn_abi_va_list (tree fndecl)
45576 if (!TARGET_64BIT)
45577 return va_list_type_node;
45578 gcc_assert (fndecl != NULL_TREE);
45580 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45581 return ms_va_list_type_node;
45582 else
45583 return sysv_va_list_type_node;
45586 /* Returns the canonical va_list type specified by TYPE. If there
45587 is no valid TYPE provided, it return NULL_TREE. */
45589 static tree
45590 ix86_canonical_va_list_type (tree type)
45592 tree wtype, htype;
45594 /* Resolve references and pointers to va_list type. */
45595 if (TREE_CODE (type) == MEM_REF)
45596 type = TREE_TYPE (type);
45597 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45598 type = TREE_TYPE (type);
45599 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45600 type = TREE_TYPE (type);
45602 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45604 wtype = va_list_type_node;
45605 gcc_assert (wtype != NULL_TREE);
45606 htype = type;
45607 if (TREE_CODE (wtype) == ARRAY_TYPE)
45609 /* If va_list is an array type, the argument may have decayed
45610 to a pointer type, e.g. by being passed to another function.
45611 In that case, unwrap both types so that we can compare the
45612 underlying records. */
45613 if (TREE_CODE (htype) == ARRAY_TYPE
45614 || POINTER_TYPE_P (htype))
45616 wtype = TREE_TYPE (wtype);
45617 htype = TREE_TYPE (htype);
45620 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45621 return va_list_type_node;
45622 wtype = sysv_va_list_type_node;
45623 gcc_assert (wtype != NULL_TREE);
45624 htype = type;
45625 if (TREE_CODE (wtype) == ARRAY_TYPE)
45627 /* If va_list is an array type, the argument may have decayed
45628 to a pointer type, e.g. by being passed to another function.
45629 In that case, unwrap both types so that we can compare the
45630 underlying records. */
45631 if (TREE_CODE (htype) == ARRAY_TYPE
45632 || POINTER_TYPE_P (htype))
45634 wtype = TREE_TYPE (wtype);
45635 htype = TREE_TYPE (htype);
45638 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45639 return sysv_va_list_type_node;
45640 wtype = ms_va_list_type_node;
45641 gcc_assert (wtype != NULL_TREE);
45642 htype = type;
45643 if (TREE_CODE (wtype) == ARRAY_TYPE)
45645 /* If va_list is an array type, the argument may have decayed
45646 to a pointer type, e.g. by being passed to another function.
45647 In that case, unwrap both types so that we can compare the
45648 underlying records. */
45649 if (TREE_CODE (htype) == ARRAY_TYPE
45650 || POINTER_TYPE_P (htype))
45652 wtype = TREE_TYPE (wtype);
45653 htype = TREE_TYPE (htype);
45656 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45657 return ms_va_list_type_node;
45658 return NULL_TREE;
45660 return std_canonical_va_list_type (type);
45663 /* Iterate through the target-specific builtin types for va_list.
45664 IDX denotes the iterator, *PTREE is set to the result type of
45665 the va_list builtin, and *PNAME to its internal type.
45666 Returns zero if there is no element for this index, otherwise
45667 IDX should be increased upon the next call.
45668 Note, do not iterate a base builtin's name like __builtin_va_list.
45669 Used from c_common_nodes_and_builtins. */
45671 static int
45672 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45674 if (TARGET_64BIT)
45676 switch (idx)
45678 default:
45679 break;
45681 case 0:
45682 *ptree = ms_va_list_type_node;
45683 *pname = "__builtin_ms_va_list";
45684 return 1;
45686 case 1:
45687 *ptree = sysv_va_list_type_node;
45688 *pname = "__builtin_sysv_va_list";
45689 return 1;
45693 return 0;
45696 #undef TARGET_SCHED_DISPATCH
45697 #define TARGET_SCHED_DISPATCH has_dispatch
45698 #undef TARGET_SCHED_DISPATCH_DO
45699 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45700 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45701 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45702 #undef TARGET_SCHED_REORDER
45703 #define TARGET_SCHED_REORDER ix86_sched_reorder
45704 #undef TARGET_SCHED_ADJUST_PRIORITY
45705 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45706 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45707 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45708 ix86_dependencies_evaluation_hook
45710 /* The size of the dispatch window is the total number of bytes of
45711 object code allowed in a window. */
45712 #define DISPATCH_WINDOW_SIZE 16
45714 /* Number of dispatch windows considered for scheduling. */
45715 #define MAX_DISPATCH_WINDOWS 3
45717 /* Maximum number of instructions in a window. */
45718 #define MAX_INSN 4
45720 /* Maximum number of immediate operands in a window. */
45721 #define MAX_IMM 4
45723 /* Maximum number of immediate bits allowed in a window. */
45724 #define MAX_IMM_SIZE 128
45726 /* Maximum number of 32 bit immediates allowed in a window. */
45727 #define MAX_IMM_32 4
45729 /* Maximum number of 64 bit immediates allowed in a window. */
45730 #define MAX_IMM_64 2
45732 /* Maximum total of loads or prefetches allowed in a window. */
45733 #define MAX_LOAD 2
45735 /* Maximum total of stores allowed in a window. */
45736 #define MAX_STORE 1
45738 #undef BIG
45739 #define BIG 100
45742 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45743 enum dispatch_group {
45744 disp_no_group = 0,
45745 disp_load,
45746 disp_store,
45747 disp_load_store,
45748 disp_prefetch,
45749 disp_imm,
45750 disp_imm_32,
45751 disp_imm_64,
45752 disp_branch,
45753 disp_cmp,
45754 disp_jcc,
45755 disp_last
45758 /* Number of allowable groups in a dispatch window. It is an array
45759 indexed by dispatch_group enum. 100 is used as a big number,
45760 because the number of these kind of operations does not have any
45761 effect in dispatch window, but we need them for other reasons in
45762 the table. */
45763 static unsigned int num_allowable_groups[disp_last] = {
45764 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45767 char group_name[disp_last + 1][16] = {
45768 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45769 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45770 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45773 /* Instruction path. */
45774 enum insn_path {
45775 no_path = 0,
45776 path_single, /* Single micro op. */
45777 path_double, /* Double micro op. */
45778 path_multi, /* Instructions with more than 2 micro op.. */
45779 last_path
45782 /* sched_insn_info defines a window to the instructions scheduled in
45783 the basic block. It contains a pointer to the insn_info table and
45784 the instruction scheduled.
45786 Windows are allocated for each basic block and are linked
45787 together. */
45788 typedef struct sched_insn_info_s {
45789 rtx insn;
45790 enum dispatch_group group;
45791 enum insn_path path;
45792 int byte_len;
45793 int imm_bytes;
45794 } sched_insn_info;
45796 /* Linked list of dispatch windows. This is a two way list of
45797 dispatch windows of a basic block. It contains information about
45798 the number of uops in the window and the total number of
45799 instructions and of bytes in the object code for this dispatch
45800 window. */
45801 typedef struct dispatch_windows_s {
45802 int num_insn; /* Number of insn in the window. */
45803 int num_uops; /* Number of uops in the window. */
45804 int window_size; /* Number of bytes in the window. */
45805 int window_num; /* Window number between 0 or 1. */
45806 int num_imm; /* Number of immediates in an insn. */
45807 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45808 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45809 int imm_size; /* Total immediates in the window. */
45810 int num_loads; /* Total memory loads in the window. */
45811 int num_stores; /* Total memory stores in the window. */
45812 int violation; /* Violation exists in window. */
45813 sched_insn_info *window; /* Pointer to the window. */
45814 struct dispatch_windows_s *next;
45815 struct dispatch_windows_s *prev;
45816 } dispatch_windows;
45818 /* Immediate valuse used in an insn. */
45819 typedef struct imm_info_s
45821 int imm;
45822 int imm32;
45823 int imm64;
45824 } imm_info;
45826 static dispatch_windows *dispatch_window_list;
45827 static dispatch_windows *dispatch_window_list1;
45829 /* Get dispatch group of insn. */
45831 static enum dispatch_group
45832 get_mem_group (rtx insn)
45834 enum attr_memory memory;
45836 if (INSN_CODE (insn) < 0)
45837 return disp_no_group;
45838 memory = get_attr_memory (insn);
45839 if (memory == MEMORY_STORE)
45840 return disp_store;
45842 if (memory == MEMORY_LOAD)
45843 return disp_load;
45845 if (memory == MEMORY_BOTH)
45846 return disp_load_store;
45848 return disp_no_group;
45851 /* Return true if insn is a compare instruction. */
45853 static bool
45854 is_cmp (rtx insn)
45856 enum attr_type type;
45858 type = get_attr_type (insn);
45859 return (type == TYPE_TEST
45860 || type == TYPE_ICMP
45861 || type == TYPE_FCMP
45862 || GET_CODE (PATTERN (insn)) == COMPARE);
45865 /* Return true if a dispatch violation encountered. */
45867 static bool
45868 dispatch_violation (void)
45870 if (dispatch_window_list->next)
45871 return dispatch_window_list->next->violation;
45872 return dispatch_window_list->violation;
45875 /* Return true if insn is a branch instruction. */
45877 static bool
45878 is_branch (rtx insn)
45880 return (CALL_P (insn) || JUMP_P (insn));
45883 /* Return true if insn is a prefetch instruction. */
45885 static bool
45886 is_prefetch (rtx insn)
45888 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45891 /* This function initializes a dispatch window and the list container holding a
45892 pointer to the window. */
45894 static void
45895 init_window (int window_num)
45897 int i;
45898 dispatch_windows *new_list;
45900 if (window_num == 0)
45901 new_list = dispatch_window_list;
45902 else
45903 new_list = dispatch_window_list1;
45905 new_list->num_insn = 0;
45906 new_list->num_uops = 0;
45907 new_list->window_size = 0;
45908 new_list->next = NULL;
45909 new_list->prev = NULL;
45910 new_list->window_num = window_num;
45911 new_list->num_imm = 0;
45912 new_list->num_imm_32 = 0;
45913 new_list->num_imm_64 = 0;
45914 new_list->imm_size = 0;
45915 new_list->num_loads = 0;
45916 new_list->num_stores = 0;
45917 new_list->violation = false;
45919 for (i = 0; i < MAX_INSN; i++)
45921 new_list->window[i].insn = NULL;
45922 new_list->window[i].group = disp_no_group;
45923 new_list->window[i].path = no_path;
45924 new_list->window[i].byte_len = 0;
45925 new_list->window[i].imm_bytes = 0;
45927 return;
45930 /* This function allocates and initializes a dispatch window and the
45931 list container holding a pointer to the window. */
45933 static dispatch_windows *
45934 allocate_window (void)
45936 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45937 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45939 return new_list;
45942 /* This routine initializes the dispatch scheduling information. It
45943 initiates building dispatch scheduler tables and constructs the
45944 first dispatch window. */
45946 static void
45947 init_dispatch_sched (void)
45949 /* Allocate a dispatch list and a window. */
45950 dispatch_window_list = allocate_window ();
45951 dispatch_window_list1 = allocate_window ();
45952 init_window (0);
45953 init_window (1);
45956 /* This function returns true if a branch is detected. End of a basic block
45957 does not have to be a branch, but here we assume only branches end a
45958 window. */
45960 static bool
45961 is_end_basic_block (enum dispatch_group group)
45963 return group == disp_branch;
45966 /* This function is called when the end of a window processing is reached. */
45968 static void
45969 process_end_window (void)
45971 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45972 if (dispatch_window_list->next)
45974 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45975 gcc_assert (dispatch_window_list->window_size
45976 + dispatch_window_list1->window_size <= 48);
45977 init_window (1);
45979 init_window (0);
45982 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45983 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45984 for 48 bytes of instructions. Note that these windows are not dispatch
45985 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45987 static dispatch_windows *
45988 allocate_next_window (int window_num)
45990 if (window_num == 0)
45992 if (dispatch_window_list->next)
45993 init_window (1);
45994 init_window (0);
45995 return dispatch_window_list;
45998 dispatch_window_list->next = dispatch_window_list1;
45999 dispatch_window_list1->prev = dispatch_window_list;
46001 return dispatch_window_list1;
46004 /* Increment the number of immediate operands of an instruction. */
46006 static int
46007 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
46009 if (*in_rtx == 0)
46010 return 0;
46012 switch ( GET_CODE (*in_rtx))
46014 case CONST:
46015 case SYMBOL_REF:
46016 case CONST_INT:
46017 (imm_values->imm)++;
46018 if (x86_64_immediate_operand (*in_rtx, SImode))
46019 (imm_values->imm32)++;
46020 else
46021 (imm_values->imm64)++;
46022 break;
46024 case CONST_DOUBLE:
46025 (imm_values->imm)++;
46026 (imm_values->imm64)++;
46027 break;
46029 case CODE_LABEL:
46030 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
46032 (imm_values->imm)++;
46033 (imm_values->imm32)++;
46035 break;
46037 default:
46038 break;
46041 return 0;
46044 /* Compute number of immediate operands of an instruction. */
46046 static void
46047 find_constant (rtx in_rtx, imm_info *imm_values)
46049 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
46050 (rtx_function) find_constant_1, (void *) imm_values);
46053 /* Return total size of immediate operands of an instruction along with number
46054 of corresponding immediate-operands. It initializes its parameters to zero
46055 befor calling FIND_CONSTANT.
46056 INSN is the input instruction. IMM is the total of immediates.
46057 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46058 bit immediates. */
46060 static int
46061 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
46063 imm_info imm_values = {0, 0, 0};
46065 find_constant (insn, &imm_values);
46066 *imm = imm_values.imm;
46067 *imm32 = imm_values.imm32;
46068 *imm64 = imm_values.imm64;
46069 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
46072 /* This function indicates if an operand of an instruction is an
46073 immediate. */
46075 static bool
46076 has_immediate (rtx insn)
46078 int num_imm_operand;
46079 int num_imm32_operand;
46080 int num_imm64_operand;
46082 if (insn)
46083 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46084 &num_imm64_operand);
46085 return false;
46088 /* Return single or double path for instructions. */
46090 static enum insn_path
46091 get_insn_path (rtx insn)
46093 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46095 if ((int)path == 0)
46096 return path_single;
46098 if ((int)path == 1)
46099 return path_double;
46101 return path_multi;
46104 /* Return insn dispatch group. */
46106 static enum dispatch_group
46107 get_insn_group (rtx insn)
46109 enum dispatch_group group = get_mem_group (insn);
46110 if (group)
46111 return group;
46113 if (is_branch (insn))
46114 return disp_branch;
46116 if (is_cmp (insn))
46117 return disp_cmp;
46119 if (has_immediate (insn))
46120 return disp_imm;
46122 if (is_prefetch (insn))
46123 return disp_prefetch;
46125 return disp_no_group;
46128 /* Count number of GROUP restricted instructions in a dispatch
46129 window WINDOW_LIST. */
46131 static int
46132 count_num_restricted (rtx insn, dispatch_windows *window_list)
46134 enum dispatch_group group = get_insn_group (insn);
46135 int imm_size;
46136 int num_imm_operand;
46137 int num_imm32_operand;
46138 int num_imm64_operand;
46140 if (group == disp_no_group)
46141 return 0;
46143 if (group == disp_imm)
46145 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46146 &num_imm64_operand);
46147 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46148 || num_imm_operand + window_list->num_imm > MAX_IMM
46149 || (num_imm32_operand > 0
46150 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46151 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46152 || (num_imm64_operand > 0
46153 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46154 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46155 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46156 && num_imm64_operand > 0
46157 && ((window_list->num_imm_64 > 0
46158 && window_list->num_insn >= 2)
46159 || window_list->num_insn >= 3)))
46160 return BIG;
46162 return 1;
46165 if ((group == disp_load_store
46166 && (window_list->num_loads >= MAX_LOAD
46167 || window_list->num_stores >= MAX_STORE))
46168 || ((group == disp_load
46169 || group == disp_prefetch)
46170 && window_list->num_loads >= MAX_LOAD)
46171 || (group == disp_store
46172 && window_list->num_stores >= MAX_STORE))
46173 return BIG;
46175 return 1;
46178 /* This function returns true if insn satisfies dispatch rules on the
46179 last window scheduled. */
46181 static bool
46182 fits_dispatch_window (rtx insn)
46184 dispatch_windows *window_list = dispatch_window_list;
46185 dispatch_windows *window_list_next = dispatch_window_list->next;
46186 unsigned int num_restrict;
46187 enum dispatch_group group = get_insn_group (insn);
46188 enum insn_path path = get_insn_path (insn);
46189 int sum;
46191 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46192 instructions should be given the lowest priority in the
46193 scheduling process in Haifa scheduler to make sure they will be
46194 scheduled in the same dispatch window as the reference to them. */
46195 if (group == disp_jcc || group == disp_cmp)
46196 return false;
46198 /* Check nonrestricted. */
46199 if (group == disp_no_group || group == disp_branch)
46200 return true;
46202 /* Get last dispatch window. */
46203 if (window_list_next)
46204 window_list = window_list_next;
46206 if (window_list->window_num == 1)
46208 sum = window_list->prev->window_size + window_list->window_size;
46210 if (sum == 32
46211 || (min_insn_size (insn) + sum) >= 48)
46212 /* Window 1 is full. Go for next window. */
46213 return true;
46216 num_restrict = count_num_restricted (insn, window_list);
46218 if (num_restrict > num_allowable_groups[group])
46219 return false;
46221 /* See if it fits in the first window. */
46222 if (window_list->window_num == 0)
46224 /* The first widow should have only single and double path
46225 uops. */
46226 if (path == path_double
46227 && (window_list->num_uops + 2) > MAX_INSN)
46228 return false;
46229 else if (path != path_single)
46230 return false;
46232 return true;
46235 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46236 dispatch window WINDOW_LIST. */
46238 static void
46239 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46241 int byte_len = min_insn_size (insn);
46242 int num_insn = window_list->num_insn;
46243 int imm_size;
46244 sched_insn_info *window = window_list->window;
46245 enum dispatch_group group = get_insn_group (insn);
46246 enum insn_path path = get_insn_path (insn);
46247 int num_imm_operand;
46248 int num_imm32_operand;
46249 int num_imm64_operand;
46251 if (!window_list->violation && group != disp_cmp
46252 && !fits_dispatch_window (insn))
46253 window_list->violation = true;
46255 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46256 &num_imm64_operand);
46258 /* Initialize window with new instruction. */
46259 window[num_insn].insn = insn;
46260 window[num_insn].byte_len = byte_len;
46261 window[num_insn].group = group;
46262 window[num_insn].path = path;
46263 window[num_insn].imm_bytes = imm_size;
46265 window_list->window_size += byte_len;
46266 window_list->num_insn = num_insn + 1;
46267 window_list->num_uops = window_list->num_uops + num_uops;
46268 window_list->imm_size += imm_size;
46269 window_list->num_imm += num_imm_operand;
46270 window_list->num_imm_32 += num_imm32_operand;
46271 window_list->num_imm_64 += num_imm64_operand;
46273 if (group == disp_store)
46274 window_list->num_stores += 1;
46275 else if (group == disp_load
46276 || group == disp_prefetch)
46277 window_list->num_loads += 1;
46278 else if (group == disp_load_store)
46280 window_list->num_stores += 1;
46281 window_list->num_loads += 1;
46285 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46286 If the total bytes of instructions or the number of instructions in
46287 the window exceed allowable, it allocates a new window. */
46289 static void
46290 add_to_dispatch_window (rtx insn)
46292 int byte_len;
46293 dispatch_windows *window_list;
46294 dispatch_windows *next_list;
46295 dispatch_windows *window0_list;
46296 enum insn_path path;
46297 enum dispatch_group insn_group;
46298 bool insn_fits;
46299 int num_insn;
46300 int num_uops;
46301 int window_num;
46302 int insn_num_uops;
46303 int sum;
46305 if (INSN_CODE (insn) < 0)
46306 return;
46308 byte_len = min_insn_size (insn);
46309 window_list = dispatch_window_list;
46310 next_list = window_list->next;
46311 path = get_insn_path (insn);
46312 insn_group = get_insn_group (insn);
46314 /* Get the last dispatch window. */
46315 if (next_list)
46316 window_list = dispatch_window_list->next;
46318 if (path == path_single)
46319 insn_num_uops = 1;
46320 else if (path == path_double)
46321 insn_num_uops = 2;
46322 else
46323 insn_num_uops = (int) path;
46325 /* If current window is full, get a new window.
46326 Window number zero is full, if MAX_INSN uops are scheduled in it.
46327 Window number one is full, if window zero's bytes plus window
46328 one's bytes is 32, or if the bytes of the new instruction added
46329 to the total makes it greater than 48, or it has already MAX_INSN
46330 instructions in it. */
46331 num_insn = window_list->num_insn;
46332 num_uops = window_list->num_uops;
46333 window_num = window_list->window_num;
46334 insn_fits = fits_dispatch_window (insn);
46336 if (num_insn >= MAX_INSN
46337 || num_uops + insn_num_uops > MAX_INSN
46338 || !(insn_fits))
46340 window_num = ~window_num & 1;
46341 window_list = allocate_next_window (window_num);
46344 if (window_num == 0)
46346 add_insn_window (insn, window_list, insn_num_uops);
46347 if (window_list->num_insn >= MAX_INSN
46348 && insn_group == disp_branch)
46350 process_end_window ();
46351 return;
46354 else if (window_num == 1)
46356 window0_list = window_list->prev;
46357 sum = window0_list->window_size + window_list->window_size;
46358 if (sum == 32
46359 || (byte_len + sum) >= 48)
46361 process_end_window ();
46362 window_list = dispatch_window_list;
46365 add_insn_window (insn, window_list, insn_num_uops);
46367 else
46368 gcc_unreachable ();
46370 if (is_end_basic_block (insn_group))
46372 /* End of basic block is reached do end-basic-block process. */
46373 process_end_window ();
46374 return;
46378 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46380 DEBUG_FUNCTION static void
46381 debug_dispatch_window_file (FILE *file, int window_num)
46383 dispatch_windows *list;
46384 int i;
46386 if (window_num == 0)
46387 list = dispatch_window_list;
46388 else
46389 list = dispatch_window_list1;
46391 fprintf (file, "Window #%d:\n", list->window_num);
46392 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46393 list->num_insn, list->num_uops, list->window_size);
46394 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46395 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46397 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46398 list->num_stores);
46399 fprintf (file, " insn info:\n");
46401 for (i = 0; i < MAX_INSN; i++)
46403 if (!list->window[i].insn)
46404 break;
46405 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46406 i, group_name[list->window[i].group],
46407 i, (void *)list->window[i].insn,
46408 i, list->window[i].path,
46409 i, list->window[i].byte_len,
46410 i, list->window[i].imm_bytes);
46414 /* Print to stdout a dispatch window. */
46416 DEBUG_FUNCTION void
46417 debug_dispatch_window (int window_num)
46419 debug_dispatch_window_file (stdout, window_num);
46422 /* Print INSN dispatch information to FILE. */
46424 DEBUG_FUNCTION static void
46425 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46427 int byte_len;
46428 enum insn_path path;
46429 enum dispatch_group group;
46430 int imm_size;
46431 int num_imm_operand;
46432 int num_imm32_operand;
46433 int num_imm64_operand;
46435 if (INSN_CODE (insn) < 0)
46436 return;
46438 byte_len = min_insn_size (insn);
46439 path = get_insn_path (insn);
46440 group = get_insn_group (insn);
46441 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46442 &num_imm64_operand);
46444 fprintf (file, " insn info:\n");
46445 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46446 group_name[group], path, byte_len);
46447 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46448 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46451 /* Print to STDERR the status of the ready list with respect to
46452 dispatch windows. */
46454 DEBUG_FUNCTION void
46455 debug_ready_dispatch (void)
46457 int i;
46458 int no_ready = number_in_ready ();
46460 fprintf (stdout, "Number of ready: %d\n", no_ready);
46462 for (i = 0; i < no_ready; i++)
46463 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46466 /* This routine is the driver of the dispatch scheduler. */
46468 static void
46469 do_dispatch (rtx_insn *insn, int mode)
46471 if (mode == DISPATCH_INIT)
46472 init_dispatch_sched ();
46473 else if (mode == ADD_TO_DISPATCH_WINDOW)
46474 add_to_dispatch_window (insn);
46477 /* Return TRUE if Dispatch Scheduling is supported. */
46479 static bool
46480 has_dispatch (rtx_insn *insn, int action)
46482 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46483 && flag_dispatch_scheduler)
46484 switch (action)
46486 default:
46487 return false;
46489 case IS_DISPATCH_ON:
46490 return true;
46491 break;
46493 case IS_CMP:
46494 return is_cmp (insn);
46496 case DISPATCH_VIOLATION:
46497 return dispatch_violation ();
46499 case FITS_DISPATCH_WINDOW:
46500 return fits_dispatch_window (insn);
46503 return false;
46506 /* Implementation of reassociation_width target hook used by
46507 reassoc phase to identify parallelism level in reassociated
46508 tree. Statements tree_code is passed in OPC. Arguments type
46509 is passed in MODE.
46511 Currently parallel reassociation is enabled for Atom
46512 processors only and we set reassociation width to be 2
46513 because Atom may issue up to 2 instructions per cycle.
46515 Return value should be fixed if parallel reassociation is
46516 enabled for other processors. */
46518 static int
46519 ix86_reassociation_width (unsigned int, enum machine_mode mode)
46521 int res = 1;
46523 /* Vector part. */
46524 if (VECTOR_MODE_P (mode))
46526 if (TARGET_VECTOR_PARALLEL_EXECUTION)
46527 return 2;
46528 else
46529 return 1;
46532 /* Scalar part. */
46533 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46534 res = 2;
46535 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46536 res = 2;
46538 return res;
46541 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46542 place emms and femms instructions. */
46544 static enum machine_mode
46545 ix86_preferred_simd_mode (enum machine_mode mode)
46547 if (!TARGET_SSE)
46548 return word_mode;
46550 switch (mode)
46552 case QImode:
46553 return TARGET_AVX512BW ? V64QImode :
46554 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46555 case HImode:
46556 return TARGET_AVX512BW ? V32HImode :
46557 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46558 case SImode:
46559 return TARGET_AVX512F ? V16SImode :
46560 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46561 case DImode:
46562 return TARGET_AVX512F ? V8DImode :
46563 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46565 case SFmode:
46566 if (TARGET_AVX512F)
46567 return V16SFmode;
46568 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46569 return V8SFmode;
46570 else
46571 return V4SFmode;
46573 case DFmode:
46574 if (!TARGET_VECTORIZE_DOUBLE)
46575 return word_mode;
46576 else if (TARGET_AVX512F)
46577 return V8DFmode;
46578 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46579 return V4DFmode;
46580 else if (TARGET_SSE2)
46581 return V2DFmode;
46582 /* FALLTHRU */
46584 default:
46585 return word_mode;
46589 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46590 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46591 256bit and 128bit vectors. */
46593 static unsigned int
46594 ix86_autovectorize_vector_sizes (void)
46596 return TARGET_AVX512F ? 64 | 32 | 16 :
46597 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46602 /* Return class of registers which could be used for pseudo of MODE
46603 and of class RCLASS for spilling instead of memory. Return NO_REGS
46604 if it is not possible or non-profitable. */
46605 static reg_class_t
46606 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46608 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46609 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46610 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
46611 return ALL_SSE_REGS;
46612 return NO_REGS;
46615 /* Implement targetm.vectorize.init_cost. */
46617 static void *
46618 ix86_init_cost (struct loop *)
46620 unsigned *cost = XNEWVEC (unsigned, 3);
46621 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46622 return cost;
46625 /* Implement targetm.vectorize.add_stmt_cost. */
46627 static unsigned
46628 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46629 struct _stmt_vec_info *stmt_info, int misalign,
46630 enum vect_cost_model_location where)
46632 unsigned *cost = (unsigned *) data;
46633 unsigned retval = 0;
46635 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46636 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46638 /* Statements in an inner loop relative to the loop being
46639 vectorized are weighted more heavily. The value here is
46640 arbitrary and could potentially be improved with analysis. */
46641 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46642 count *= 50; /* FIXME. */
46644 retval = (unsigned) (count * stmt_cost);
46646 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46647 for Silvermont as it has out of order integer pipeline and can execute
46648 2 scalar instruction per tick, but has in order SIMD pipeline. */
46649 if (TARGET_SILVERMONT || TARGET_INTEL)
46650 if (stmt_info && stmt_info->stmt)
46652 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46653 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46654 retval = (retval * 17) / 10;
46657 cost[where] += retval;
46659 return retval;
46662 /* Implement targetm.vectorize.finish_cost. */
46664 static void
46665 ix86_finish_cost (void *data, unsigned *prologue_cost,
46666 unsigned *body_cost, unsigned *epilogue_cost)
46668 unsigned *cost = (unsigned *) data;
46669 *prologue_cost = cost[vect_prologue];
46670 *body_cost = cost[vect_body];
46671 *epilogue_cost = cost[vect_epilogue];
46674 /* Implement targetm.vectorize.destroy_cost_data. */
46676 static void
46677 ix86_destroy_cost_data (void *data)
46679 free (data);
46682 /* Validate target specific memory model bits in VAL. */
46684 static unsigned HOST_WIDE_INT
46685 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46687 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46688 bool strong;
46690 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46691 |MEMMODEL_MASK)
46692 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46694 warning (OPT_Winvalid_memory_model,
46695 "Unknown architecture specific memory model");
46696 return MEMMODEL_SEQ_CST;
46698 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46699 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46701 warning (OPT_Winvalid_memory_model,
46702 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46703 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46705 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46707 warning (OPT_Winvalid_memory_model,
46708 "HLE_RELEASE not used with RELEASE or stronger memory model");
46709 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46711 return val;
46714 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46715 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46716 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46717 or number of vecsize_mangle variants that should be emitted. */
46719 static int
46720 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46721 struct cgraph_simd_clone *clonei,
46722 tree base_type, int num)
46724 int ret = 1;
46726 if (clonei->simdlen
46727 && (clonei->simdlen < 2
46728 || clonei->simdlen > 16
46729 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46731 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46732 "unsupported simdlen %d", clonei->simdlen);
46733 return 0;
46736 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46737 if (TREE_CODE (ret_type) != VOID_TYPE)
46738 switch (TYPE_MODE (ret_type))
46740 case QImode:
46741 case HImode:
46742 case SImode:
46743 case DImode:
46744 case SFmode:
46745 case DFmode:
46746 /* case SCmode: */
46747 /* case DCmode: */
46748 break;
46749 default:
46750 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46751 "unsupported return type %qT for simd\n", ret_type);
46752 return 0;
46755 tree t;
46756 int i;
46758 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46759 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46760 switch (TYPE_MODE (TREE_TYPE (t)))
46762 case QImode:
46763 case HImode:
46764 case SImode:
46765 case DImode:
46766 case SFmode:
46767 case DFmode:
46768 /* case SCmode: */
46769 /* case DCmode: */
46770 break;
46771 default:
46772 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46773 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46774 return 0;
46777 if (clonei->cilk_elemental)
46779 /* Parse here processor clause. If not present, default to 'b'. */
46780 clonei->vecsize_mangle = 'b';
46782 else if (!TREE_PUBLIC (node->decl))
46784 /* If the function isn't exported, we can pick up just one ISA
46785 for the clones. */
46786 if (TARGET_AVX2)
46787 clonei->vecsize_mangle = 'd';
46788 else if (TARGET_AVX)
46789 clonei->vecsize_mangle = 'c';
46790 else
46791 clonei->vecsize_mangle = 'b';
46792 ret = 1;
46794 else
46796 clonei->vecsize_mangle = "bcd"[num];
46797 ret = 3;
46799 switch (clonei->vecsize_mangle)
46801 case 'b':
46802 clonei->vecsize_int = 128;
46803 clonei->vecsize_float = 128;
46804 break;
46805 case 'c':
46806 clonei->vecsize_int = 128;
46807 clonei->vecsize_float = 256;
46808 break;
46809 case 'd':
46810 clonei->vecsize_int = 256;
46811 clonei->vecsize_float = 256;
46812 break;
46814 if (clonei->simdlen == 0)
46816 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46817 clonei->simdlen = clonei->vecsize_int;
46818 else
46819 clonei->simdlen = clonei->vecsize_float;
46820 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46821 if (clonei->simdlen > 16)
46822 clonei->simdlen = 16;
46824 return ret;
46827 /* Add target attribute to SIMD clone NODE if needed. */
46829 static void
46830 ix86_simd_clone_adjust (struct cgraph_node *node)
46832 const char *str = NULL;
46833 gcc_assert (node->decl == cfun->decl);
46834 switch (node->simdclone->vecsize_mangle)
46836 case 'b':
46837 if (!TARGET_SSE2)
46838 str = "sse2";
46839 break;
46840 case 'c':
46841 if (!TARGET_AVX)
46842 str = "avx";
46843 break;
46844 case 'd':
46845 if (!TARGET_AVX2)
46846 str = "avx2";
46847 break;
46848 default:
46849 gcc_unreachable ();
46851 if (str == NULL)
46852 return;
46853 push_cfun (NULL);
46854 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46855 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46856 gcc_assert (ok);
46857 pop_cfun ();
46858 ix86_previous_fndecl = NULL_TREE;
46859 ix86_set_current_function (node->decl);
46862 /* If SIMD clone NODE can't be used in a vectorized loop
46863 in current function, return -1, otherwise return a badness of using it
46864 (0 if it is most desirable from vecsize_mangle point of view, 1
46865 slightly less desirable, etc.). */
46867 static int
46868 ix86_simd_clone_usable (struct cgraph_node *node)
46870 switch (node->simdclone->vecsize_mangle)
46872 case 'b':
46873 if (!TARGET_SSE2)
46874 return -1;
46875 if (!TARGET_AVX)
46876 return 0;
46877 return TARGET_AVX2 ? 2 : 1;
46878 case 'c':
46879 if (!TARGET_AVX)
46880 return -1;
46881 return TARGET_AVX2 ? 1 : 0;
46882 break;
46883 case 'd':
46884 if (!TARGET_AVX2)
46885 return -1;
46886 return 0;
46887 default:
46888 gcc_unreachable ();
46892 /* This function gives out the number of memory references.
46893 This value determines the unrolling factor for
46894 bdver3 and bdver4 architectures. */
46896 static int
46897 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46899 if (*x != NULL_RTX && MEM_P (*x))
46901 enum machine_mode mode;
46902 unsigned int n_words;
46904 mode = GET_MODE (*x);
46905 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46907 if (n_words > 4)
46908 (*mem_count)+=2;
46909 else
46910 (*mem_count)+=1;
46912 return 0;
46915 /* This function adjusts the unroll factor based on
46916 the hardware capabilities. For ex, bdver3 has
46917 a loop buffer which makes unrolling of smaller
46918 loops less important. This function decides the
46919 unroll factor using number of memory references
46920 (value 32 is used) as a heuristic. */
46922 static unsigned
46923 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46925 basic_block *bbs;
46926 rtx_insn *insn;
46927 unsigned i;
46928 unsigned mem_count = 0;
46930 if (!TARGET_ADJUST_UNROLL)
46931 return nunroll;
46933 /* Count the number of memory references within the loop body. */
46934 bbs = get_loop_body (loop);
46935 for (i = 0; i < loop->num_nodes; i++)
46937 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46938 if (NONDEBUG_INSN_P (insn))
46939 for_each_rtx_in_insn (&insn, (rtx_function) ix86_loop_memcount,
46940 &mem_count);
46942 free (bbs);
46944 if (mem_count && mem_count <=32)
46945 return 32/mem_count;
46947 return nunroll;
46951 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46953 static bool
46954 ix86_float_exceptions_rounding_supported_p (void)
46956 /* For x87 floating point with standard excess precision handling,
46957 there is no adddf3 pattern (since x87 floating point only has
46958 XFmode operations) so the default hook implementation gets this
46959 wrong. */
46960 return TARGET_80387 || TARGET_SSE_MATH;
46963 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46965 static void
46966 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46968 if (!TARGET_80387 && !TARGET_SSE_MATH)
46969 return;
46970 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46971 if (TARGET_80387)
46973 tree fenv_index_type = build_index_type (size_int (6));
46974 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46975 tree fenv_var = create_tmp_var (fenv_type, NULL);
46976 mark_addressable (fenv_var);
46977 tree fenv_ptr = build_pointer_type (fenv_type);
46978 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46979 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46980 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46981 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46982 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46983 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46984 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46985 tree hold_fnclex = build_call_expr (fnclex, 0);
46986 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46987 hold_fnclex);
46988 *clear = build_call_expr (fnclex, 0);
46989 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46990 tree fnstsw_call = build_call_expr (fnstsw, 0);
46991 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
46992 sw_var, fnstsw_call);
46993 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46994 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46995 exceptions_var, exceptions_x87);
46996 *update = build2 (COMPOUND_EXPR, integer_type_node,
46997 sw_mod, update_mod);
46998 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46999 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
47001 if (TARGET_SSE_MATH)
47003 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
47004 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
47005 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
47006 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
47007 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
47008 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
47009 mxcsr_orig_var, stmxcsr_hold_call);
47010 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
47011 mxcsr_orig_var,
47012 build_int_cst (unsigned_type_node, 0x1f80));
47013 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
47014 build_int_cst (unsigned_type_node, 0xffffffc0));
47015 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
47016 mxcsr_mod_var, hold_mod_val);
47017 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47018 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
47019 hold_assign_orig, hold_assign_mod);
47020 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
47021 ldmxcsr_hold_call);
47022 if (*hold)
47023 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
47024 else
47025 *hold = hold_all;
47026 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47027 if (*clear)
47028 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
47029 ldmxcsr_clear_call);
47030 else
47031 *clear = ldmxcsr_clear_call;
47032 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
47033 tree exceptions_sse = fold_convert (integer_type_node,
47034 stxmcsr_update_call);
47035 if (*update)
47037 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
47038 exceptions_var, exceptions_sse);
47039 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
47040 exceptions_var, exceptions_mod);
47041 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
47042 exceptions_assign);
47044 else
47045 *update = build2 (MODIFY_EXPR, integer_type_node,
47046 exceptions_var, exceptions_sse);
47047 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
47048 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47049 ldmxcsr_update_call);
47051 tree atomic_feraiseexcept
47052 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
47053 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
47054 1, exceptions_var);
47055 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47056 atomic_feraiseexcept_call);
47059 /* Initialize the GCC target structure. */
47060 #undef TARGET_RETURN_IN_MEMORY
47061 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47063 #undef TARGET_LEGITIMIZE_ADDRESS
47064 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47066 #undef TARGET_ATTRIBUTE_TABLE
47067 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47068 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47069 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47070 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47071 # undef TARGET_MERGE_DECL_ATTRIBUTES
47072 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47073 #endif
47075 #undef TARGET_COMP_TYPE_ATTRIBUTES
47076 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47078 #undef TARGET_INIT_BUILTINS
47079 #define TARGET_INIT_BUILTINS ix86_init_builtins
47080 #undef TARGET_BUILTIN_DECL
47081 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47082 #undef TARGET_EXPAND_BUILTIN
47083 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47085 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47086 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47087 ix86_builtin_vectorized_function
47089 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47090 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47092 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47093 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47095 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47096 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47098 #undef TARGET_BUILTIN_RECIPROCAL
47099 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47101 #undef TARGET_ASM_FUNCTION_EPILOGUE
47102 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47104 #undef TARGET_ENCODE_SECTION_INFO
47105 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47106 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47107 #else
47108 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47109 #endif
47111 #undef TARGET_ASM_OPEN_PAREN
47112 #define TARGET_ASM_OPEN_PAREN ""
47113 #undef TARGET_ASM_CLOSE_PAREN
47114 #define TARGET_ASM_CLOSE_PAREN ""
47116 #undef TARGET_ASM_BYTE_OP
47117 #define TARGET_ASM_BYTE_OP ASM_BYTE
47119 #undef TARGET_ASM_ALIGNED_HI_OP
47120 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47121 #undef TARGET_ASM_ALIGNED_SI_OP
47122 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47123 #ifdef ASM_QUAD
47124 #undef TARGET_ASM_ALIGNED_DI_OP
47125 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47126 #endif
47128 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47129 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47131 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47132 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47134 #undef TARGET_ASM_UNALIGNED_HI_OP
47135 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47136 #undef TARGET_ASM_UNALIGNED_SI_OP
47137 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47138 #undef TARGET_ASM_UNALIGNED_DI_OP
47139 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47141 #undef TARGET_PRINT_OPERAND
47142 #define TARGET_PRINT_OPERAND ix86_print_operand
47143 #undef TARGET_PRINT_OPERAND_ADDRESS
47144 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47145 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47146 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47147 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47148 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47150 #undef TARGET_SCHED_INIT_GLOBAL
47151 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47152 #undef TARGET_SCHED_ADJUST_COST
47153 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47154 #undef TARGET_SCHED_ISSUE_RATE
47155 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47156 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47157 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47158 ia32_multipass_dfa_lookahead
47159 #undef TARGET_SCHED_MACRO_FUSION_P
47160 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47161 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47162 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47164 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47165 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47167 #undef TARGET_MEMMODEL_CHECK
47168 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47170 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47171 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47173 #ifdef HAVE_AS_TLS
47174 #undef TARGET_HAVE_TLS
47175 #define TARGET_HAVE_TLS true
47176 #endif
47177 #undef TARGET_CANNOT_FORCE_CONST_MEM
47178 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47179 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47180 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47182 #undef TARGET_DELEGITIMIZE_ADDRESS
47183 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47185 #undef TARGET_MS_BITFIELD_LAYOUT_P
47186 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47188 #if TARGET_MACHO
47189 #undef TARGET_BINDS_LOCAL_P
47190 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47191 #endif
47192 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47193 #undef TARGET_BINDS_LOCAL_P
47194 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47195 #endif
47197 #undef TARGET_ASM_OUTPUT_MI_THUNK
47198 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47199 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47200 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47202 #undef TARGET_ASM_FILE_START
47203 #define TARGET_ASM_FILE_START x86_file_start
47205 #undef TARGET_OPTION_OVERRIDE
47206 #define TARGET_OPTION_OVERRIDE ix86_option_override
47208 #undef TARGET_REGISTER_MOVE_COST
47209 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47210 #undef TARGET_MEMORY_MOVE_COST
47211 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47212 #undef TARGET_RTX_COSTS
47213 #define TARGET_RTX_COSTS ix86_rtx_costs
47214 #undef TARGET_ADDRESS_COST
47215 #define TARGET_ADDRESS_COST ix86_address_cost
47217 #undef TARGET_FIXED_CONDITION_CODE_REGS
47218 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47219 #undef TARGET_CC_MODES_COMPATIBLE
47220 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47222 #undef TARGET_MACHINE_DEPENDENT_REORG
47223 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47225 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47226 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47228 #undef TARGET_BUILD_BUILTIN_VA_LIST
47229 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47231 #undef TARGET_FOLD_BUILTIN
47232 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47234 #undef TARGET_COMPARE_VERSION_PRIORITY
47235 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47237 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47238 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47239 ix86_generate_version_dispatcher_body
47241 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47242 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47243 ix86_get_function_versions_dispatcher
47245 #undef TARGET_ENUM_VA_LIST_P
47246 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47248 #undef TARGET_FN_ABI_VA_LIST
47249 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47251 #undef TARGET_CANONICAL_VA_LIST_TYPE
47252 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47254 #undef TARGET_EXPAND_BUILTIN_VA_START
47255 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47257 #undef TARGET_MD_ASM_CLOBBERS
47258 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47260 #undef TARGET_PROMOTE_PROTOTYPES
47261 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47262 #undef TARGET_SETUP_INCOMING_VARARGS
47263 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47264 #undef TARGET_MUST_PASS_IN_STACK
47265 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47266 #undef TARGET_FUNCTION_ARG_ADVANCE
47267 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47268 #undef TARGET_FUNCTION_ARG
47269 #define TARGET_FUNCTION_ARG ix86_function_arg
47270 #undef TARGET_FUNCTION_ARG_BOUNDARY
47271 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47272 #undef TARGET_PASS_BY_REFERENCE
47273 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47274 #undef TARGET_INTERNAL_ARG_POINTER
47275 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47276 #undef TARGET_UPDATE_STACK_BOUNDARY
47277 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47278 #undef TARGET_GET_DRAP_RTX
47279 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47280 #undef TARGET_STRICT_ARGUMENT_NAMING
47281 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47282 #undef TARGET_STATIC_CHAIN
47283 #define TARGET_STATIC_CHAIN ix86_static_chain
47284 #undef TARGET_TRAMPOLINE_INIT
47285 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47286 #undef TARGET_RETURN_POPS_ARGS
47287 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47289 #undef TARGET_LEGITIMATE_COMBINED_INSN
47290 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47292 #undef TARGET_ASAN_SHADOW_OFFSET
47293 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47295 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47296 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47298 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47299 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47301 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47302 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47304 #undef TARGET_C_MODE_FOR_SUFFIX
47305 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47307 #ifdef HAVE_AS_TLS
47308 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47309 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47310 #endif
47312 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47313 #undef TARGET_INSERT_ATTRIBUTES
47314 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47315 #endif
47317 #undef TARGET_MANGLE_TYPE
47318 #define TARGET_MANGLE_TYPE ix86_mangle_type
47320 #if !TARGET_MACHO
47321 #undef TARGET_STACK_PROTECT_FAIL
47322 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47323 #endif
47325 #undef TARGET_FUNCTION_VALUE
47326 #define TARGET_FUNCTION_VALUE ix86_function_value
47328 #undef TARGET_FUNCTION_VALUE_REGNO_P
47329 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47331 #undef TARGET_PROMOTE_FUNCTION_MODE
47332 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47334 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47335 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47337 #undef TARGET_INSTANTIATE_DECLS
47338 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47340 #undef TARGET_SECONDARY_RELOAD
47341 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47343 #undef TARGET_CLASS_MAX_NREGS
47344 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47346 #undef TARGET_PREFERRED_RELOAD_CLASS
47347 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47348 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47349 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47350 #undef TARGET_CLASS_LIKELY_SPILLED_P
47351 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47353 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47354 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47355 ix86_builtin_vectorization_cost
47356 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47357 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47358 ix86_vectorize_vec_perm_const_ok
47359 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47360 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47361 ix86_preferred_simd_mode
47362 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47363 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47364 ix86_autovectorize_vector_sizes
47365 #undef TARGET_VECTORIZE_INIT_COST
47366 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47367 #undef TARGET_VECTORIZE_ADD_STMT_COST
47368 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47369 #undef TARGET_VECTORIZE_FINISH_COST
47370 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47371 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47372 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47374 #undef TARGET_SET_CURRENT_FUNCTION
47375 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47377 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47378 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47380 #undef TARGET_OPTION_SAVE
47381 #define TARGET_OPTION_SAVE ix86_function_specific_save
47383 #undef TARGET_OPTION_RESTORE
47384 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47386 #undef TARGET_OPTION_PRINT
47387 #define TARGET_OPTION_PRINT ix86_function_specific_print
47389 #undef TARGET_OPTION_FUNCTION_VERSIONS
47390 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47392 #undef TARGET_CAN_INLINE_P
47393 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47395 #undef TARGET_EXPAND_TO_RTL_HOOK
47396 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47398 #undef TARGET_LEGITIMATE_ADDRESS_P
47399 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47401 #undef TARGET_LRA_P
47402 #define TARGET_LRA_P hook_bool_void_true
47404 #undef TARGET_REGISTER_PRIORITY
47405 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47407 #undef TARGET_REGISTER_USAGE_LEVELING_P
47408 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47410 #undef TARGET_LEGITIMATE_CONSTANT_P
47411 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47413 #undef TARGET_FRAME_POINTER_REQUIRED
47414 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47416 #undef TARGET_CAN_ELIMINATE
47417 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47419 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47420 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47422 #undef TARGET_ASM_CODE_END
47423 #define TARGET_ASM_CODE_END ix86_code_end
47425 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47426 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47428 #if TARGET_MACHO
47429 #undef TARGET_INIT_LIBFUNCS
47430 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47431 #endif
47433 #undef TARGET_LOOP_UNROLL_ADJUST
47434 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47436 #undef TARGET_SPILL_CLASS
47437 #define TARGET_SPILL_CLASS ix86_spill_class
47439 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47440 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47441 ix86_simd_clone_compute_vecsize_and_simdlen
47443 #undef TARGET_SIMD_CLONE_ADJUST
47444 #define TARGET_SIMD_CLONE_ADJUST \
47445 ix86_simd_clone_adjust
47447 #undef TARGET_SIMD_CLONE_USABLE
47448 #define TARGET_SIMD_CLONE_USABLE \
47449 ix86_simd_clone_usable
47451 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47452 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47453 ix86_float_exceptions_rounding_supported_p
47455 #undef TARGET_MODE_EMIT
47456 #define TARGET_MODE_EMIT ix86_emit_mode_set
47458 #undef TARGET_MODE_NEEDED
47459 #define TARGET_MODE_NEEDED ix86_mode_needed
47461 #undef TARGET_MODE_AFTER
47462 #define TARGET_MODE_AFTER ix86_mode_after
47464 #undef TARGET_MODE_ENTRY
47465 #define TARGET_MODE_ENTRY ix86_mode_entry
47467 #undef TARGET_MODE_EXIT
47468 #define TARGET_MODE_EXIT ix86_mode_exit
47470 #undef TARGET_MODE_PRIORITY
47471 #define TARGET_MODE_PRIORITY ix86_mode_priority
47473 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47474 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47476 struct gcc_target targetm = TARGET_INITIALIZER;
47478 #include "gt-i386.h"