Merge with main truk.
[official-gcc.git] / gcc / config / i386 / i386.c
blobc520f26c89146c81e1e2ad0d05b798b9682e21ac
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-iterator.h"
85 #include "tree-chkp.h"
86 #include "rtl-chkp.h"
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 1, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 1, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 /* MPX bound registers */
2134 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2137 /* The "default" register map used in 32bit mode. */
2139 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2141 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2142 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2143 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2144 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2145 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2148 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2149 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2150 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2151 101, 102, 103, 104, /* bound registers */
2154 /* The "default" register map used in 64bit mode. */
2156 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2158 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2159 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2160 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2161 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2162 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2163 8,9,10,11,12,13,14,15, /* extended integer registers */
2164 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2165 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2166 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2167 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2168 126, 127, 128, 129, /* bound registers */
2171 /* Define the register numbers to be used in Dwarf debugging information.
2172 The SVR4 reference port C compiler uses the following register numbers
2173 in its Dwarf output code:
2174 0 for %eax (gcc regno = 0)
2175 1 for %ecx (gcc regno = 2)
2176 2 for %edx (gcc regno = 1)
2177 3 for %ebx (gcc regno = 3)
2178 4 for %esp (gcc regno = 7)
2179 5 for %ebp (gcc regno = 6)
2180 6 for %esi (gcc regno = 4)
2181 7 for %edi (gcc regno = 5)
2182 The following three DWARF register numbers are never generated by
2183 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2184 believes these numbers have these meanings.
2185 8 for %eip (no gcc equivalent)
2186 9 for %eflags (gcc regno = 17)
2187 10 for %trapno (no gcc equivalent)
2188 It is not at all clear how we should number the FP stack registers
2189 for the x86 architecture. If the version of SDB on x86/svr4 were
2190 a bit less brain dead with respect to floating-point then we would
2191 have a precedent to follow with respect to DWARF register numbers
2192 for x86 FP registers, but the SDB on x86/svr4 is so completely
2193 broken with respect to FP registers that it is hardly worth thinking
2194 of it as something to strive for compatibility with.
2195 The version of x86/svr4 SDB I have at the moment does (partially)
2196 seem to believe that DWARF register number 11 is associated with
2197 the x86 register %st(0), but that's about all. Higher DWARF
2198 register numbers don't seem to be associated with anything in
2199 particular, and even for DWARF regno 11, SDB only seems to under-
2200 stand that it should say that a variable lives in %st(0) (when
2201 asked via an `=' command) if we said it was in DWARF regno 11,
2202 but SDB still prints garbage when asked for the value of the
2203 variable in question (via a `/' command).
2204 (Also note that the labels SDB prints for various FP stack regs
2205 when doing an `x' command are all wrong.)
2206 Note that these problems generally don't affect the native SVR4
2207 C compiler because it doesn't allow the use of -O with -g and
2208 because when it is *not* optimizing, it allocates a memory
2209 location for each floating-point variable, and the memory
2210 location is what gets described in the DWARF AT_location
2211 attribute for the variable in question.
2212 Regardless of the severe mental illness of the x86/svr4 SDB, we
2213 do something sensible here and we use the following DWARF
2214 register numbers. Note that these are all stack-top-relative
2215 numbers.
2216 11 for %st(0) (gcc regno = 8)
2217 12 for %st(1) (gcc regno = 9)
2218 13 for %st(2) (gcc regno = 10)
2219 14 for %st(3) (gcc regno = 11)
2220 15 for %st(4) (gcc regno = 12)
2221 16 for %st(5) (gcc regno = 13)
2222 17 for %st(6) (gcc regno = 14)
2223 18 for %st(7) (gcc regno = 15)
2225 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2227 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2228 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2229 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2230 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2231 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2232 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2233 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2234 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2235 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2236 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2237 101, 102, 103, 104, /* bound registers */
2240 /* Define parameter passing and return registers. */
2242 static int const x86_64_int_parameter_registers[6] =
2244 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2247 static int const x86_64_ms_abi_int_parameter_registers[4] =
2249 CX_REG, DX_REG, R8_REG, R9_REG
2252 static int const x86_64_int_return_registers[4] =
2254 AX_REG, DX_REG, DI_REG, SI_REG
2257 /* Additional registers that are clobbered by SYSV calls. */
2259 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2261 SI_REG, DI_REG,
2262 XMM6_REG, XMM7_REG,
2263 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2264 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2267 /* Define the structure for the machine field in struct function. */
2269 struct GTY(()) stack_local_entry {
2270 unsigned short mode;
2271 unsigned short n;
2272 rtx rtl;
2273 struct stack_local_entry *next;
2276 /* Structure describing stack frame layout.
2277 Stack grows downward:
2279 [arguments]
2280 <- ARG_POINTER
2281 saved pc
2283 saved static chain if ix86_static_chain_on_stack
2285 saved frame pointer if frame_pointer_needed
2286 <- HARD_FRAME_POINTER
2287 [saved regs]
2288 <- regs_save_offset
2289 [padding0]
2291 [saved SSE regs]
2292 <- sse_regs_save_offset
2293 [padding1] |
2294 | <- FRAME_POINTER
2295 [va_arg registers] |
2297 [frame] |
2299 [padding2] | = to_allocate
2300 <- STACK_POINTER
2302 struct ix86_frame
2304 int nsseregs;
2305 int nregs;
2306 int va_arg_size;
2307 int red_zone_size;
2308 int outgoing_arguments_size;
2310 /* The offsets relative to ARG_POINTER. */
2311 HOST_WIDE_INT frame_pointer_offset;
2312 HOST_WIDE_INT hard_frame_pointer_offset;
2313 HOST_WIDE_INT stack_pointer_offset;
2314 HOST_WIDE_INT hfp_save_offset;
2315 HOST_WIDE_INT reg_save_offset;
2316 HOST_WIDE_INT sse_reg_save_offset;
2318 /* When save_regs_using_mov is set, emit prologue using
2319 move instead of push instructions. */
2320 bool save_regs_using_mov;
2323 /* Which cpu are we scheduling for. */
2324 enum attr_cpu ix86_schedule;
2326 /* Which cpu are we optimizing for. */
2327 enum processor_type ix86_tune;
2329 /* Which instruction set architecture to use. */
2330 enum processor_type ix86_arch;
2332 /* True if processor has SSE prefetch instruction. */
2333 unsigned char x86_prefetch_sse;
2335 /* -mstackrealign option */
2336 static const char ix86_force_align_arg_pointer_string[]
2337 = "force_align_arg_pointer";
2339 static rtx (*ix86_gen_leave) (void);
2340 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2343 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2344 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2346 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2347 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2348 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2349 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2350 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2352 /* Preferred alignment for stack boundary in bits. */
2353 unsigned int ix86_preferred_stack_boundary;
2355 /* Alignment for incoming stack boundary in bits specified at
2356 command line. */
2357 static unsigned int ix86_user_incoming_stack_boundary;
2359 /* Default alignment for incoming stack boundary in bits. */
2360 static unsigned int ix86_default_incoming_stack_boundary;
2362 /* Alignment for incoming stack boundary in bits. */
2363 unsigned int ix86_incoming_stack_boundary;
2365 /* Calling abi specific va_list type nodes. */
2366 static GTY(()) tree sysv_va_list_type_node;
2367 static GTY(()) tree ms_va_list_type_node;
2369 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2370 char internal_label_prefix[16];
2371 int internal_label_prefix_len;
2373 /* Fence to use after loop using movnt. */
2374 tree x86_mfence;
2376 /* Register class used for passing given 64bit part of the argument.
2377 These represent classes as documented by the PS ABI, with the exception
2378 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2379 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2381 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2382 whenever possible (upper half does contain padding). */
2383 enum x86_64_reg_class
2385 X86_64_NO_CLASS,
2386 X86_64_INTEGER_CLASS,
2387 X86_64_INTEGERSI_CLASS,
2388 X86_64_SSE_CLASS,
2389 X86_64_SSESF_CLASS,
2390 X86_64_SSEDF_CLASS,
2391 X86_64_SSEUP_CLASS,
2392 X86_64_X87_CLASS,
2393 X86_64_X87UP_CLASS,
2394 X86_64_COMPLEX_X87_CLASS,
2395 X86_64_MEMORY_CLASS
2398 #define MAX_CLASSES 8
2400 /* Table of constants used by fldpi, fldln2, etc.... */
2401 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2402 static bool ext_80387_constants_init = 0;
2405 static struct machine_function * ix86_init_machine_status (void);
2406 static rtx ix86_function_value (const_tree, const_tree, bool);
2407 static bool ix86_function_value_regno_p (const unsigned int);
2408 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2409 const_tree);
2410 static rtx ix86_static_chain (const_tree, bool);
2411 static int ix86_function_regparm (const_tree, const_tree);
2412 static void ix86_compute_frame_layout (struct ix86_frame *);
2413 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2414 rtx, rtx, int);
2415 static void ix86_add_new_builtins (HOST_WIDE_INT);
2416 static tree ix86_canonical_va_list_type (tree);
2417 static void predict_jump (int);
2418 static unsigned int split_stack_prologue_scratch_regno (void);
2419 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2421 enum ix86_function_specific_strings
2423 IX86_FUNCTION_SPECIFIC_ARCH,
2424 IX86_FUNCTION_SPECIFIC_TUNE,
2425 IX86_FUNCTION_SPECIFIC_MAX
2428 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2429 const char *, enum fpmath_unit, bool);
2430 static void ix86_function_specific_save (struct cl_target_option *,
2431 struct gcc_options *opts);
2432 static void ix86_function_specific_restore (struct gcc_options *opts,
2433 struct cl_target_option *);
2434 static void ix86_function_specific_print (FILE *, int,
2435 struct cl_target_option *);
2436 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2437 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2438 struct gcc_options *,
2439 struct gcc_options *,
2440 struct gcc_options *);
2441 static bool ix86_can_inline_p (tree, tree);
2442 static void ix86_set_current_function (tree);
2443 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2445 static enum calling_abi ix86_function_abi (const_tree);
2448 #ifndef SUBTARGET32_DEFAULT_CPU
2449 #define SUBTARGET32_DEFAULT_CPU "i386"
2450 #endif
2452 /* Whether -mtune= or -march= were specified */
2453 static int ix86_tune_defaulted;
2454 static int ix86_arch_specified;
2456 /* Vectorization library interface and handlers. */
2457 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2459 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2460 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2462 /* Processor target table, indexed by processor number */
2463 struct ptt
2465 const char *const name; /* processor name */
2466 const struct processor_costs *cost; /* Processor costs */
2467 const int align_loop; /* Default alignments. */
2468 const int align_loop_max_skip;
2469 const int align_jump;
2470 const int align_jump_max_skip;
2471 const int align_func;
2474 /* This table must be in sync with enum processor_type in i386.h. */
2475 static const struct ptt processor_target_table[PROCESSOR_max] =
2477 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2478 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2479 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2480 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2481 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2482 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2483 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2484 {"core2", &core_cost, 16, 10, 16, 10, 16},
2485 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2486 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2487 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2488 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2489 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2490 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2491 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2492 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2493 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2494 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2495 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2496 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2497 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2498 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2499 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2500 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2501 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2504 static bool
2505 gate_insert_vzeroupper (void)
2507 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2510 static unsigned int
2511 rest_of_handle_insert_vzeroupper (void)
2513 int i;
2515 /* vzeroupper instructions are inserted immediately after reload to
2516 account for possible spills from 256bit registers. The pass
2517 reuses mode switching infrastructure by re-running mode insertion
2518 pass, so disable entities that have already been processed. */
2519 for (i = 0; i < MAX_386_ENTITIES; i++)
2520 ix86_optimize_mode_switching[i] = 0;
2522 ix86_optimize_mode_switching[AVX_U128] = 1;
2524 /* Call optimize_mode_switching. */
2525 g->get_passes ()->execute_pass_mode_switching ();
2526 return 0;
2529 namespace {
2531 const pass_data pass_data_insert_vzeroupper =
2533 RTL_PASS, /* type */
2534 "vzeroupper", /* name */
2535 OPTGROUP_NONE, /* optinfo_flags */
2536 true, /* has_gate */
2537 true, /* has_execute */
2538 TV_NONE, /* tv_id */
2539 0, /* properties_required */
2540 0, /* properties_provided */
2541 0, /* properties_destroyed */
2542 0, /* todo_flags_start */
2543 ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
2546 class pass_insert_vzeroupper : public rtl_opt_pass
2548 public:
2549 pass_insert_vzeroupper(gcc::context *ctxt)
2550 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2553 /* opt_pass methods: */
2554 bool gate () { return gate_insert_vzeroupper (); }
2555 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2557 }; // class pass_insert_vzeroupper
2559 } // anon namespace
2561 rtl_opt_pass *
2562 make_pass_insert_vzeroupper (gcc::context *ctxt)
2564 return new pass_insert_vzeroupper (ctxt);
2567 /* Return true if a red-zone is in use. */
2569 static inline bool
2570 ix86_using_red_zone (void)
2572 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2575 /* Return a string that documents the current -m options. The caller is
2576 responsible for freeing the string. */
2578 static char *
2579 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2580 const char *tune, enum fpmath_unit fpmath,
2581 bool add_nl_p)
2583 struct ix86_target_opts
2585 const char *option; /* option string */
2586 HOST_WIDE_INT mask; /* isa mask options */
2589 /* This table is ordered so that options like -msse4.2 that imply
2590 preceding options while match those first. */
2591 static struct ix86_target_opts isa_opts[] =
2593 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2594 { "-mfma", OPTION_MASK_ISA_FMA },
2595 { "-mxop", OPTION_MASK_ISA_XOP },
2596 { "-mlwp", OPTION_MASK_ISA_LWP },
2597 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2598 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2599 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2600 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2601 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2602 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2603 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2604 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2605 { "-msse3", OPTION_MASK_ISA_SSE3 },
2606 { "-msse2", OPTION_MASK_ISA_SSE2 },
2607 { "-msse", OPTION_MASK_ISA_SSE },
2608 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2609 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2610 { "-mmmx", OPTION_MASK_ISA_MMX },
2611 { "-mabm", OPTION_MASK_ISA_ABM },
2612 { "-mbmi", OPTION_MASK_ISA_BMI },
2613 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2614 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2615 { "-mhle", OPTION_MASK_ISA_HLE },
2616 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2617 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2618 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2619 { "-madx", OPTION_MASK_ISA_ADX },
2620 { "-mtbm", OPTION_MASK_ISA_TBM },
2621 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2622 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2623 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2624 { "-maes", OPTION_MASK_ISA_AES },
2625 { "-msha", OPTION_MASK_ISA_SHA },
2626 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2627 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2628 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2629 { "-mf16c", OPTION_MASK_ISA_F16C },
2630 { "-mrtm", OPTION_MASK_ISA_RTM },
2631 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2632 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2633 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2634 { "-mmpx", OPTION_MASK_ISA_MPX },
2637 /* Flag options. */
2638 static struct ix86_target_opts flag_opts[] =
2640 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2641 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2642 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2643 { "-m80387", MASK_80387 },
2644 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2645 { "-malign-double", MASK_ALIGN_DOUBLE },
2646 { "-mcld", MASK_CLD },
2647 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2648 { "-mieee-fp", MASK_IEEE_FP },
2649 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2650 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2651 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2652 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2653 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2654 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2655 { "-mno-red-zone", MASK_NO_RED_ZONE },
2656 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2657 { "-mrecip", MASK_RECIP },
2658 { "-mrtd", MASK_RTD },
2659 { "-msseregparm", MASK_SSEREGPARM },
2660 { "-mstack-arg-probe", MASK_STACK_PROBE },
2661 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2662 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2663 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2664 { "-mvzeroupper", MASK_VZEROUPPER },
2665 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2666 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2667 { "-mprefer-avx128", MASK_PREFER_AVX128},
2670 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2672 char isa_other[40];
2673 char target_other[40];
2674 unsigned num = 0;
2675 unsigned i, j;
2676 char *ret;
2677 char *ptr;
2678 size_t len;
2679 size_t line_len;
2680 size_t sep_len;
2681 const char *abi;
2683 memset (opts, '\0', sizeof (opts));
2685 /* Add -march= option. */
2686 if (arch)
2688 opts[num][0] = "-march=";
2689 opts[num++][1] = arch;
2692 /* Add -mtune= option. */
2693 if (tune)
2695 opts[num][0] = "-mtune=";
2696 opts[num++][1] = tune;
2699 /* Add -m32/-m64/-mx32. */
2700 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2702 if ((isa & OPTION_MASK_ABI_64) != 0)
2703 abi = "-m64";
2704 else
2705 abi = "-mx32";
2706 isa &= ~ (OPTION_MASK_ISA_64BIT
2707 | OPTION_MASK_ABI_64
2708 | OPTION_MASK_ABI_X32);
2710 else
2711 abi = "-m32";
2712 opts[num++][0] = abi;
2714 /* Pick out the options in isa options. */
2715 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2717 if ((isa & isa_opts[i].mask) != 0)
2719 opts[num++][0] = isa_opts[i].option;
2720 isa &= ~ isa_opts[i].mask;
2724 if (isa && add_nl_p)
2726 opts[num++][0] = isa_other;
2727 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2728 isa);
2731 /* Add flag options. */
2732 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2734 if ((flags & flag_opts[i].mask) != 0)
2736 opts[num++][0] = flag_opts[i].option;
2737 flags &= ~ flag_opts[i].mask;
2741 if (flags && add_nl_p)
2743 opts[num++][0] = target_other;
2744 sprintf (target_other, "(other flags: %#x)", flags);
2747 /* Add -fpmath= option. */
2748 if (fpmath)
2750 opts[num][0] = "-mfpmath=";
2751 switch ((int) fpmath)
2753 case FPMATH_387:
2754 opts[num++][1] = "387";
2755 break;
2757 case FPMATH_SSE:
2758 opts[num++][1] = "sse";
2759 break;
2761 case FPMATH_387 | FPMATH_SSE:
2762 opts[num++][1] = "sse+387";
2763 break;
2765 default:
2766 gcc_unreachable ();
2770 /* Any options? */
2771 if (num == 0)
2772 return NULL;
2774 gcc_assert (num < ARRAY_SIZE (opts));
2776 /* Size the string. */
2777 len = 0;
2778 sep_len = (add_nl_p) ? 3 : 1;
2779 for (i = 0; i < num; i++)
2781 len += sep_len;
2782 for (j = 0; j < 2; j++)
2783 if (opts[i][j])
2784 len += strlen (opts[i][j]);
2787 /* Build the string. */
2788 ret = ptr = (char *) xmalloc (len);
2789 line_len = 0;
2791 for (i = 0; i < num; i++)
2793 size_t len2[2];
2795 for (j = 0; j < 2; j++)
2796 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2798 if (i != 0)
2800 *ptr++ = ' ';
2801 line_len++;
2803 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2805 *ptr++ = '\\';
2806 *ptr++ = '\n';
2807 line_len = 0;
2811 for (j = 0; j < 2; j++)
2812 if (opts[i][j])
2814 memcpy (ptr, opts[i][j], len2[j]);
2815 ptr += len2[j];
2816 line_len += len2[j];
2820 *ptr = '\0';
2821 gcc_assert (ret + len >= ptr);
2823 return ret;
2826 /* Return true, if profiling code should be emitted before
2827 prologue. Otherwise it returns false.
2828 Note: For x86 with "hotfix" it is sorried. */
2829 static bool
2830 ix86_profile_before_prologue (void)
2832 return flag_fentry != 0;
2835 /* Function that is callable from the debugger to print the current
2836 options. */
2837 void ATTRIBUTE_UNUSED
2838 ix86_debug_options (void)
2840 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2841 ix86_arch_string, ix86_tune_string,
2842 ix86_fpmath, true);
2844 if (opts)
2846 fprintf (stderr, "%s\n\n", opts);
2847 free (opts);
2849 else
2850 fputs ("<no options>\n\n", stderr);
2852 return;
2855 static const char *stringop_alg_names[] = {
2856 #define DEF_ENUM
2857 #define DEF_ALG(alg, name) #name,
2858 #include "stringop.def"
2859 #undef DEF_ENUM
2860 #undef DEF_ALG
2863 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2864 The string is of the following form (or comma separated list of it):
2866 strategy_alg:max_size:[align|noalign]
2868 where the full size range for the strategy is either [0, max_size] or
2869 [min_size, max_size], in which min_size is the max_size + 1 of the
2870 preceding range. The last size range must have max_size == -1.
2872 Examples:
2875 -mmemcpy-strategy=libcall:-1:noalign
2877 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2881 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2883 This is to tell the compiler to use the following strategy for memset
2884 1) when the expected size is between [1, 16], use rep_8byte strategy;
2885 2) when the size is between [17, 2048], use vector_loop;
2886 3) when the size is > 2048, use libcall. */
2888 struct stringop_size_range
2890 int max;
2891 stringop_alg alg;
2892 bool noalign;
2895 static void
2896 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2898 const struct stringop_algs *default_algs;
2899 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2900 char *curr_range_str, *next_range_str;
2901 int i = 0, n = 0;
2903 if (is_memset)
2904 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2905 else
2906 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2908 curr_range_str = strategy_str;
2912 int maxs;
2913 char alg_name[128];
2914 char align[16];
2915 next_range_str = strchr (curr_range_str, ',');
2916 if (next_range_str)
2917 *next_range_str++ = '\0';
2919 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2920 alg_name, &maxs, align))
2922 error ("wrong arg %s to option %s", curr_range_str,
2923 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2924 return;
2927 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2929 error ("size ranges of option %s should be increasing",
2930 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2931 return;
2934 for (i = 0; i < last_alg; i++)
2935 if (!strcmp (alg_name, stringop_alg_names[i]))
2936 break;
2938 if (i == last_alg)
2940 error ("wrong stringop strategy name %s specified for option %s",
2941 alg_name,
2942 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2943 return;
2946 input_ranges[n].max = maxs;
2947 input_ranges[n].alg = (stringop_alg) i;
2948 if (!strcmp (align, "align"))
2949 input_ranges[n].noalign = false;
2950 else if (!strcmp (align, "noalign"))
2951 input_ranges[n].noalign = true;
2952 else
2954 error ("unknown alignment %s specified for option %s",
2955 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2956 return;
2958 n++;
2959 curr_range_str = next_range_str;
2961 while (curr_range_str);
2963 if (input_ranges[n - 1].max != -1)
2965 error ("the max value for the last size range should be -1"
2966 " for option %s",
2967 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2968 return;
2971 if (n > MAX_STRINGOP_ALGS)
2973 error ("too many size ranges specified in option %s",
2974 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2975 return;
2978 /* Now override the default algs array. */
2979 for (i = 0; i < n; i++)
2981 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2982 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2983 = input_ranges[i].alg;
2984 *const_cast<int *>(&default_algs->size[i].noalign)
2985 = input_ranges[i].noalign;
2990 /* parse -mtune-ctrl= option. When DUMP is true,
2991 print the features that are explicitly set. */
2993 static void
2994 parse_mtune_ctrl_str (bool dump)
2996 if (!ix86_tune_ctrl_string)
2997 return;
2999 char *next_feature_string = NULL;
3000 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3001 char *orig = curr_feature_string;
3002 int i;
3005 bool clear = false;
3007 next_feature_string = strchr (curr_feature_string, ',');
3008 if (next_feature_string)
3009 *next_feature_string++ = '\0';
3010 if (*curr_feature_string == '^')
3012 curr_feature_string++;
3013 clear = true;
3015 for (i = 0; i < X86_TUNE_LAST; i++)
3017 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3019 ix86_tune_features[i] = !clear;
3020 if (dump)
3021 fprintf (stderr, "Explicitly %s feature %s\n",
3022 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3023 break;
3026 if (i == X86_TUNE_LAST)
3027 error ("Unknown parameter to option -mtune-ctrl: %s",
3028 clear ? curr_feature_string - 1 : curr_feature_string);
3029 curr_feature_string = next_feature_string;
3031 while (curr_feature_string);
3032 free (orig);
3035 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3036 processor type. */
3038 static void
3039 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3041 unsigned int ix86_tune_mask = 1u << ix86_tune;
3042 int i;
3044 for (i = 0; i < X86_TUNE_LAST; ++i)
3046 if (ix86_tune_no_default)
3047 ix86_tune_features[i] = 0;
3048 else
3049 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3052 if (dump)
3054 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3055 for (i = 0; i < X86_TUNE_LAST; i++)
3056 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3057 ix86_tune_features[i] ? "on" : "off");
3060 parse_mtune_ctrl_str (dump);
3064 /* Override various settings based on options. If MAIN_ARGS_P, the
3065 options are from the command line, otherwise they are from
3066 attributes. */
3068 static void
3069 ix86_option_override_internal (bool main_args_p,
3070 struct gcc_options *opts,
3071 struct gcc_options *opts_set)
3073 int i;
3074 unsigned int ix86_arch_mask;
3075 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3076 const char *prefix;
3077 const char *suffix;
3078 const char *sw;
3080 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3081 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3082 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3083 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3084 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3085 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3086 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3087 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3088 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3089 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3090 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3091 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3092 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3093 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3094 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3095 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3096 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3097 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3098 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3099 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3100 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3101 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3102 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3103 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3104 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3105 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3106 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3107 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3108 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3109 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3110 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3111 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3112 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3113 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3114 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3115 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3116 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3117 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3118 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3119 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3120 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3121 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3122 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3123 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3124 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3125 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3126 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3128 #define PTA_CORE2 \
3129 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3130 | PTA_CX16 | PTA_FXSR)
3131 #define PTA_NEHALEM \
3132 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3133 #define PTA_WESTMERE \
3134 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3135 #define PTA_SANDYBRIDGE \
3136 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3137 #define PTA_IVYBRIDGE \
3138 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3139 #define PTA_HASWELL \
3140 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3141 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE)
3142 #define PTA_BROADWELL \
3143 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3144 #define PTA_BONNELL \
3145 (PTA_CORE2 | PTA_MOVBE)
3146 #define PTA_SILVERMONT \
3147 (PTA_WESTMERE | PTA_MOVBE)
3149 /* if this reaches 64, need to widen struct pta flags below */
3151 static struct pta
3153 const char *const name; /* processor name or nickname. */
3154 const enum processor_type processor;
3155 const enum attr_cpu schedule;
3156 const unsigned HOST_WIDE_INT flags;
3158 const processor_alias_table[] =
3160 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3161 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3162 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3163 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3164 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3165 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3166 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3167 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3168 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3169 PTA_MMX | PTA_SSE | PTA_FXSR},
3170 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3171 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3172 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3173 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3174 PTA_MMX | PTA_SSE | PTA_FXSR},
3175 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3176 PTA_MMX | PTA_SSE | PTA_FXSR},
3177 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3178 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3179 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3180 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3181 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3182 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3183 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3184 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3185 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3186 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3187 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3188 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3189 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3190 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3191 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3192 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3193 PTA_SANDYBRIDGE},
3194 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3195 PTA_SANDYBRIDGE},
3196 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3197 PTA_IVYBRIDGE},
3198 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3199 PTA_IVYBRIDGE},
3200 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3201 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3202 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3203 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3204 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3205 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3206 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3207 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3208 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3209 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3210 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3211 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3212 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3213 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3214 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3215 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3216 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3217 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3218 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3219 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3220 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3221 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3223 {"x86-64", PROCESSOR_K8, CPU_K8,
3224 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3225 {"k8", PROCESSOR_K8, CPU_K8,
3226 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3227 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3228 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3229 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3230 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3231 {"opteron", PROCESSOR_K8, CPU_K8,
3232 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3233 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3234 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3235 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3236 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3237 {"athlon64", PROCESSOR_K8, CPU_K8,
3238 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3239 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3240 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3241 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3242 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3243 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3244 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3245 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3246 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3247 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3248 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3249 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3250 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3251 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3252 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3253 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3254 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3255 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3256 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3257 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3258 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3259 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3260 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3261 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3262 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3263 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3264 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3265 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3266 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3267 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3268 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3269 | PTA_XSAVEOPT | PTA_FSGSBASE},
3270 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3271 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3272 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3273 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3274 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3275 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3276 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3277 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3278 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3279 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3280 | PTA_FXSR | PTA_XSAVE},
3281 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3282 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3283 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3284 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3285 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3286 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3288 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3289 PTA_64BIT
3290 | PTA_HLE /* flags are only used for -march switch. */ },
3293 /* -mrecip options. */
3294 static struct
3296 const char *string; /* option name */
3297 unsigned int mask; /* mask bits to set */
3299 const recip_options[] =
3301 { "all", RECIP_MASK_ALL },
3302 { "none", RECIP_MASK_NONE },
3303 { "div", RECIP_MASK_DIV },
3304 { "sqrt", RECIP_MASK_SQRT },
3305 { "vec-div", RECIP_MASK_VEC_DIV },
3306 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3309 int const pta_size = ARRAY_SIZE (processor_alias_table);
3311 /* Set up prefix/suffix so the error messages refer to either the command
3312 line argument, or the attribute(target). */
3313 if (main_args_p)
3315 prefix = "-m";
3316 suffix = "";
3317 sw = "switch";
3319 else
3321 prefix = "option(\"";
3322 suffix = "\")";
3323 sw = "attribute";
3326 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3327 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3328 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3329 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3330 #ifdef TARGET_BI_ARCH
3331 else
3333 #if TARGET_BI_ARCH == 1
3334 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3335 is on and OPTION_MASK_ABI_X32 is off. We turn off
3336 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3337 -mx32. */
3338 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3339 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3340 #else
3341 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3342 on and OPTION_MASK_ABI_64 is off. We turn off
3343 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3344 -m64. */
3345 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3346 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3347 #endif
3349 #endif
3351 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3353 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3354 OPTION_MASK_ABI_64 for TARGET_X32. */
3355 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3356 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3358 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3359 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3360 | OPTION_MASK_ABI_X32
3361 | OPTION_MASK_ABI_64);
3362 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3364 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3365 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3366 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3367 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3370 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3371 SUBTARGET_OVERRIDE_OPTIONS;
3372 #endif
3374 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3375 SUBSUBTARGET_OVERRIDE_OPTIONS;
3376 #endif
3378 /* -fPIC is the default for x86_64. */
3379 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3380 opts->x_flag_pic = 2;
3382 /* Need to check -mtune=generic first. */
3383 if (opts->x_ix86_tune_string)
3385 /* As special support for cross compilers we read -mtune=native
3386 as -mtune=generic. With native compilers we won't see the
3387 -mtune=native, as it was changed by the driver. */
3388 if (!strcmp (opts->x_ix86_tune_string, "native"))
3390 opts->x_ix86_tune_string = "generic";
3392 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3393 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3394 "%stune=k8%s or %stune=generic%s instead as appropriate",
3395 prefix, suffix, prefix, suffix, prefix, suffix);
3397 else
3399 if (opts->x_ix86_arch_string)
3400 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3401 if (!opts->x_ix86_tune_string)
3403 opts->x_ix86_tune_string
3404 = processor_target_table[TARGET_CPU_DEFAULT].name;
3405 ix86_tune_defaulted = 1;
3408 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3409 or defaulted. We need to use a sensible tune option. */
3410 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3412 opts->x_ix86_tune_string = "generic";
3416 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3417 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3419 /* rep; movq isn't available in 32-bit code. */
3420 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3421 opts->x_ix86_stringop_alg = no_stringop;
3424 if (!opts->x_ix86_arch_string)
3425 opts->x_ix86_arch_string
3426 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3427 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3428 else
3429 ix86_arch_specified = 1;
3431 if (opts_set->x_ix86_pmode)
3433 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3434 && opts->x_ix86_pmode == PMODE_SI)
3435 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3436 && opts->x_ix86_pmode == PMODE_DI))
3437 error ("address mode %qs not supported in the %s bit mode",
3438 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3439 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3441 else
3442 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3443 ? PMODE_DI : PMODE_SI;
3445 if (!opts_set->x_ix86_abi)
3446 opts->x_ix86_abi = DEFAULT_ABI;
3448 /* For targets using ms ABI enable ms-extensions, if not
3449 explicit turned off. For non-ms ABI we turn off this
3450 option. */
3451 if (!opts_set->x_flag_ms_extensions)
3452 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3454 if (opts_set->x_ix86_cmodel)
3456 switch (opts->x_ix86_cmodel)
3458 case CM_SMALL:
3459 case CM_SMALL_PIC:
3460 if (opts->x_flag_pic)
3461 opts->x_ix86_cmodel = CM_SMALL_PIC;
3462 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3463 error ("code model %qs not supported in the %s bit mode",
3464 "small", "32");
3465 break;
3467 case CM_MEDIUM:
3468 case CM_MEDIUM_PIC:
3469 if (opts->x_flag_pic)
3470 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3471 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3472 error ("code model %qs not supported in the %s bit mode",
3473 "medium", "32");
3474 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3475 error ("code model %qs not supported in x32 mode",
3476 "medium");
3477 break;
3479 case CM_LARGE:
3480 case CM_LARGE_PIC:
3481 if (opts->x_flag_pic)
3482 opts->x_ix86_cmodel = CM_LARGE_PIC;
3483 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3484 error ("code model %qs not supported in the %s bit mode",
3485 "large", "32");
3486 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3487 error ("code model %qs not supported in x32 mode",
3488 "large");
3489 break;
3491 case CM_32:
3492 if (opts->x_flag_pic)
3493 error ("code model %s does not support PIC mode", "32");
3494 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3495 error ("code model %qs not supported in the %s bit mode",
3496 "32", "64");
3497 break;
3499 case CM_KERNEL:
3500 if (opts->x_flag_pic)
3502 error ("code model %s does not support PIC mode", "kernel");
3503 opts->x_ix86_cmodel = CM_32;
3505 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3506 error ("code model %qs not supported in the %s bit mode",
3507 "kernel", "32");
3508 break;
3510 default:
3511 gcc_unreachable ();
3514 else
3516 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3517 use of rip-relative addressing. This eliminates fixups that
3518 would otherwise be needed if this object is to be placed in a
3519 DLL, and is essentially just as efficient as direct addressing. */
3520 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3521 && (TARGET_RDOS || TARGET_PECOFF))
3522 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3523 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3524 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3525 else
3526 opts->x_ix86_cmodel = CM_32;
3528 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3530 error ("-masm=intel not supported in this configuration");
3531 opts->x_ix86_asm_dialect = ASM_ATT;
3533 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3534 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3535 sorry ("%i-bit mode not compiled in",
3536 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3538 for (i = 0; i < pta_size; i++)
3539 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3541 ix86_schedule = processor_alias_table[i].schedule;
3542 ix86_arch = processor_alias_table[i].processor;
3543 /* Default cpu tuning to the architecture. */
3544 ix86_tune = ix86_arch;
3546 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3547 && !(processor_alias_table[i].flags & PTA_64BIT))
3548 error ("CPU you selected does not support x86-64 "
3549 "instruction set");
3551 if (processor_alias_table[i].flags & PTA_MMX
3552 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3553 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3554 if (processor_alias_table[i].flags & PTA_3DNOW
3555 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3556 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3557 if (processor_alias_table[i].flags & PTA_3DNOW_A
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3560 if (processor_alias_table[i].flags & PTA_SSE
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3563 if (processor_alias_table[i].flags & PTA_SSE2
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3566 if (processor_alias_table[i].flags & PTA_SSE3
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3569 if (processor_alias_table[i].flags & PTA_SSSE3
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3572 if (processor_alias_table[i].flags & PTA_SSE4_1
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3575 if (processor_alias_table[i].flags & PTA_SSE4_2
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3578 if (processor_alias_table[i].flags & PTA_AVX
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3581 if (processor_alias_table[i].flags & PTA_AVX2
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3584 if (processor_alias_table[i].flags & PTA_FMA
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3587 if (processor_alias_table[i].flags & PTA_SSE4A
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3590 if (processor_alias_table[i].flags & PTA_FMA4
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3593 if (processor_alias_table[i].flags & PTA_XOP
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3596 if (processor_alias_table[i].flags & PTA_LWP
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3599 if (processor_alias_table[i].flags & PTA_ABM
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3602 if (processor_alias_table[i].flags & PTA_BMI
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3605 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3608 if (processor_alias_table[i].flags & PTA_TBM
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3611 if (processor_alias_table[i].flags & PTA_BMI2
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3614 if (processor_alias_table[i].flags & PTA_CX16
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3617 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3620 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3621 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3622 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3623 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3624 if (processor_alias_table[i].flags & PTA_MOVBE
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3627 if (processor_alias_table[i].flags & PTA_AES
3628 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3629 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3630 if (processor_alias_table[i].flags & PTA_SHA
3631 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3632 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3633 if (processor_alias_table[i].flags & PTA_PCLMUL
3634 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3635 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3636 if (processor_alias_table[i].flags & PTA_FSGSBASE
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3639 if (processor_alias_table[i].flags & PTA_RDRND
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3642 if (processor_alias_table[i].flags & PTA_F16C
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3645 if (processor_alias_table[i].flags & PTA_RTM
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3648 if (processor_alias_table[i].flags & PTA_HLE
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3651 if (processor_alias_table[i].flags & PTA_PRFCHW
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3654 if (processor_alias_table[i].flags & PTA_RDSEED
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3657 if (processor_alias_table[i].flags & PTA_ADX
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3660 if (processor_alias_table[i].flags & PTA_FXSR
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3663 if (processor_alias_table[i].flags & PTA_XSAVE
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3666 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3669 if (processor_alias_table[i].flags & PTA_AVX512F
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3672 if (processor_alias_table[i].flags & PTA_AVX512ER
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3675 if (processor_alias_table[i].flags & PTA_AVX512PF
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3678 if (processor_alias_table[i].flags & PTA_AVX512CD
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3681 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3684 if (processor_alias_table[i].flags & PTA_MPX
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3687 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3688 x86_prefetch_sse = true;
3690 break;
3693 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3694 error ("Intel MPX does not support x32");
3696 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3697 error ("Intel MPX does not support x32");
3699 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3700 error ("generic CPU can be used only for %stune=%s %s",
3701 prefix, suffix, sw);
3702 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3703 error ("intel CPU can be used only for %stune=%s %s",
3704 prefix, suffix, sw);
3705 else if (i == pta_size)
3706 error ("bad value (%s) for %sarch=%s %s",
3707 opts->x_ix86_arch_string, prefix, suffix, sw);
3709 ix86_arch_mask = 1u << ix86_arch;
3710 for (i = 0; i < X86_ARCH_LAST; ++i)
3711 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3713 for (i = 0; i < pta_size; i++)
3714 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3716 ix86_schedule = processor_alias_table[i].schedule;
3717 ix86_tune = processor_alias_table[i].processor;
3718 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3720 if (!(processor_alias_table[i].flags & PTA_64BIT))
3722 if (ix86_tune_defaulted)
3724 opts->x_ix86_tune_string = "x86-64";
3725 for (i = 0; i < pta_size; i++)
3726 if (! strcmp (opts->x_ix86_tune_string,
3727 processor_alias_table[i].name))
3728 break;
3729 ix86_schedule = processor_alias_table[i].schedule;
3730 ix86_tune = processor_alias_table[i].processor;
3732 else
3733 error ("CPU you selected does not support x86-64 "
3734 "instruction set");
3737 /* Intel CPUs have always interpreted SSE prefetch instructions as
3738 NOPs; so, we can enable SSE prefetch instructions even when
3739 -mtune (rather than -march) points us to a processor that has them.
3740 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3741 higher processors. */
3742 if (TARGET_CMOV
3743 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3744 x86_prefetch_sse = true;
3745 break;
3748 if (ix86_tune_specified && i == pta_size)
3749 error ("bad value (%s) for %stune=%s %s",
3750 opts->x_ix86_tune_string, prefix, suffix, sw);
3752 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3754 #ifndef USE_IX86_FRAME_POINTER
3755 #define USE_IX86_FRAME_POINTER 0
3756 #endif
3758 #ifndef USE_X86_64_FRAME_POINTER
3759 #define USE_X86_64_FRAME_POINTER 0
3760 #endif
3762 /* Set the default values for switches whose default depends on TARGET_64BIT
3763 in case they weren't overwritten by command line options. */
3764 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3766 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3767 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3768 if (opts->x_flag_asynchronous_unwind_tables
3769 && !opts_set->x_flag_unwind_tables
3770 && TARGET_64BIT_MS_ABI)
3771 opts->x_flag_unwind_tables = 1;
3772 if (opts->x_flag_asynchronous_unwind_tables == 2)
3773 opts->x_flag_unwind_tables
3774 = opts->x_flag_asynchronous_unwind_tables = 1;
3775 if (opts->x_flag_pcc_struct_return == 2)
3776 opts->x_flag_pcc_struct_return = 0;
3778 else
3780 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3781 opts->x_flag_omit_frame_pointer
3782 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3783 if (opts->x_flag_asynchronous_unwind_tables == 2)
3784 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3785 if (opts->x_flag_pcc_struct_return == 2)
3786 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3789 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3790 if (opts->x_optimize_size)
3791 ix86_cost = &ix86_size_cost;
3792 else
3793 ix86_cost = ix86_tune_cost;
3795 /* Arrange to set up i386_stack_locals for all functions. */
3796 init_machine_status = ix86_init_machine_status;
3798 /* Validate -mregparm= value. */
3799 if (opts_set->x_ix86_regparm)
3801 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3802 warning (0, "-mregparm is ignored in 64-bit mode");
3803 if (opts->x_ix86_regparm > REGPARM_MAX)
3805 error ("-mregparm=%d is not between 0 and %d",
3806 opts->x_ix86_regparm, REGPARM_MAX);
3807 opts->x_ix86_regparm = 0;
3810 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3811 opts->x_ix86_regparm = REGPARM_MAX;
3813 /* Default align_* from the processor table. */
3814 if (opts->x_align_loops == 0)
3816 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3817 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3819 if (opts->x_align_jumps == 0)
3821 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3822 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3824 if (opts->x_align_functions == 0)
3826 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3829 /* Provide default for -mbranch-cost= value. */
3830 if (!opts_set->x_ix86_branch_cost)
3831 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3833 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3835 opts->x_target_flags
3836 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3838 /* Enable by default the SSE and MMX builtins. Do allow the user to
3839 explicitly disable any of these. In particular, disabling SSE and
3840 MMX for kernel code is extremely useful. */
3841 if (!ix86_arch_specified)
3842 opts->x_ix86_isa_flags
3843 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3844 | TARGET_SUBTARGET64_ISA_DEFAULT)
3845 & ~opts->x_ix86_isa_flags_explicit);
3847 if (TARGET_RTD_P (opts->x_target_flags))
3848 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3850 else
3852 opts->x_target_flags
3853 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3855 if (!ix86_arch_specified)
3856 opts->x_ix86_isa_flags
3857 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3859 /* i386 ABI does not specify red zone. It still makes sense to use it
3860 when programmer takes care to stack from being destroyed. */
3861 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3862 opts->x_target_flags |= MASK_NO_RED_ZONE;
3865 /* Keep nonleaf frame pointers. */
3866 if (opts->x_flag_omit_frame_pointer)
3867 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3868 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3869 opts->x_flag_omit_frame_pointer = 1;
3871 /* If we're doing fast math, we don't care about comparison order
3872 wrt NaNs. This lets us use a shorter comparison sequence. */
3873 if (opts->x_flag_finite_math_only)
3874 opts->x_target_flags &= ~MASK_IEEE_FP;
3876 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3877 since the insns won't need emulation. */
3878 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3879 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3881 /* Likewise, if the target doesn't have a 387, or we've specified
3882 software floating point, don't use 387 inline intrinsics. */
3883 if (!TARGET_80387_P (opts->x_target_flags))
3884 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3886 /* Turn on MMX builtins for -msse. */
3887 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3888 opts->x_ix86_isa_flags
3889 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3891 /* Enable SSE prefetch. */
3892 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3893 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3894 x86_prefetch_sse = true;
3896 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3897 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3898 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3899 opts->x_ix86_isa_flags
3900 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3902 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3903 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3904 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3905 opts->x_ix86_isa_flags
3906 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3908 /* Enable lzcnt instruction for -mabm. */
3909 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3910 opts->x_ix86_isa_flags
3911 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3913 /* Validate -mpreferred-stack-boundary= value or default it to
3914 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3915 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3916 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3918 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3919 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3920 int max = (TARGET_SEH ? 4 : 12);
3922 if (opts->x_ix86_preferred_stack_boundary_arg < min
3923 || opts->x_ix86_preferred_stack_boundary_arg > max)
3925 if (min == max)
3926 error ("-mpreferred-stack-boundary is not supported "
3927 "for this target");
3928 else
3929 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3930 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3932 else
3933 ix86_preferred_stack_boundary
3934 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3937 /* Set the default value for -mstackrealign. */
3938 if (opts->x_ix86_force_align_arg_pointer == -1)
3939 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3941 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3943 /* Validate -mincoming-stack-boundary= value or default it to
3944 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3945 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3946 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3948 if (opts->x_ix86_incoming_stack_boundary_arg
3949 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3950 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3951 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3952 opts->x_ix86_incoming_stack_boundary_arg,
3953 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3954 else
3956 ix86_user_incoming_stack_boundary
3957 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3958 ix86_incoming_stack_boundary
3959 = ix86_user_incoming_stack_boundary;
3963 /* Accept -msseregparm only if at least SSE support is enabled. */
3964 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3965 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3966 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3968 if (opts_set->x_ix86_fpmath)
3970 if (opts->x_ix86_fpmath & FPMATH_SSE)
3972 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3974 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3975 opts->x_ix86_fpmath = FPMATH_387;
3977 else if ((opts->x_ix86_fpmath & FPMATH_387)
3978 && !TARGET_80387_P (opts->x_target_flags))
3980 warning (0, "387 instruction set disabled, using SSE arithmetics");
3981 opts->x_ix86_fpmath = FPMATH_SSE;
3985 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3986 fpmath=387. The second is however default at many targets since the
3987 extra 80bit precision of temporaries is considered to be part of ABI.
3988 Overwrite the default at least for -ffast-math.
3989 TODO: -mfpmath=both seems to produce same performing code with bit
3990 smaller binaries. It is however not clear if register allocation is
3991 ready for this setting.
3992 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3993 codegen. We may switch to 387 with -ffast-math for size optimized
3994 functions. */
3995 else if (fast_math_flags_set_p (&global_options)
3996 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3997 opts->x_ix86_fpmath = FPMATH_SSE;
3998 else
3999 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4001 /* If the i387 is disabled, then do not return values in it. */
4002 if (!TARGET_80387_P (opts->x_target_flags))
4003 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4005 /* Use external vectorized library in vectorizing intrinsics. */
4006 if (opts_set->x_ix86_veclibabi_type)
4007 switch (opts->x_ix86_veclibabi_type)
4009 case ix86_veclibabi_type_svml:
4010 ix86_veclib_handler = ix86_veclibabi_svml;
4011 break;
4013 case ix86_veclibabi_type_acml:
4014 ix86_veclib_handler = ix86_veclibabi_acml;
4015 break;
4017 default:
4018 gcc_unreachable ();
4021 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4022 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4023 && !opts->x_optimize_size)
4024 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4026 /* If stack probes are required, the space used for large function
4027 arguments on the stack must also be probed, so enable
4028 -maccumulate-outgoing-args so this happens in the prologue. */
4029 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4030 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4032 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4033 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4034 "for correctness", prefix, suffix);
4035 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4038 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4040 char *p;
4041 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4042 p = strchr (internal_label_prefix, 'X');
4043 internal_label_prefix_len = p - internal_label_prefix;
4044 *p = '\0';
4047 /* When scheduling description is not available, disable scheduler pass
4048 so it won't slow down the compilation and make x87 code slower. */
4049 if (!TARGET_SCHEDULE)
4050 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4052 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4053 ix86_tune_cost->simultaneous_prefetches,
4054 opts->x_param_values,
4055 opts_set->x_param_values);
4056 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4057 ix86_tune_cost->prefetch_block,
4058 opts->x_param_values,
4059 opts_set->x_param_values);
4060 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4061 ix86_tune_cost->l1_cache_size,
4062 opts->x_param_values,
4063 opts_set->x_param_values);
4064 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4065 ix86_tune_cost->l2_cache_size,
4066 opts->x_param_values,
4067 opts_set->x_param_values);
4069 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4070 if (opts->x_flag_prefetch_loop_arrays < 0
4071 && HAVE_prefetch
4072 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4073 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4074 opts->x_flag_prefetch_loop_arrays = 1;
4076 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4077 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4078 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4079 targetm.expand_builtin_va_start = NULL;
4081 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4083 ix86_gen_leave = gen_leave_rex64;
4084 if (Pmode == DImode)
4086 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4087 ix86_gen_tls_local_dynamic_base_64
4088 = gen_tls_local_dynamic_base_64_di;
4090 else
4092 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4093 ix86_gen_tls_local_dynamic_base_64
4094 = gen_tls_local_dynamic_base_64_si;
4097 else
4098 ix86_gen_leave = gen_leave;
4100 if (Pmode == DImode)
4102 ix86_gen_add3 = gen_adddi3;
4103 ix86_gen_sub3 = gen_subdi3;
4104 ix86_gen_sub3_carry = gen_subdi3_carry;
4105 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4106 ix86_gen_andsp = gen_anddi3;
4107 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4108 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4109 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4110 ix86_gen_monitor = gen_sse3_monitor_di;
4112 else
4114 ix86_gen_add3 = gen_addsi3;
4115 ix86_gen_sub3 = gen_subsi3;
4116 ix86_gen_sub3_carry = gen_subsi3_carry;
4117 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4118 ix86_gen_andsp = gen_andsi3;
4119 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4120 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4121 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4122 ix86_gen_monitor = gen_sse3_monitor_si;
4125 #ifdef USE_IX86_CLD
4126 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4127 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4128 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4129 #endif
4131 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4133 if (opts->x_flag_fentry > 0)
4134 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4135 "with -fpic");
4136 opts->x_flag_fentry = 0;
4138 else if (TARGET_SEH)
4140 if (opts->x_flag_fentry == 0)
4141 sorry ("-mno-fentry isn%'t compatible with SEH");
4142 opts->x_flag_fentry = 1;
4144 else if (opts->x_flag_fentry < 0)
4146 #if defined(PROFILE_BEFORE_PROLOGUE)
4147 opts->x_flag_fentry = 1;
4148 #else
4149 opts->x_flag_fentry = 0;
4150 #endif
4153 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4154 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4155 AVX unaligned load/store. */
4156 if (!opts->x_optimize_size)
4158 if (flag_expensive_optimizations
4159 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4160 opts->x_target_flags |= MASK_VZEROUPPER;
4161 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4162 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4163 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4164 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4165 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4166 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4167 /* Enable 128-bit AVX instruction generation
4168 for the auto-vectorizer. */
4169 if (TARGET_AVX128_OPTIMAL
4170 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4171 opts->x_target_flags |= MASK_PREFER_AVX128;
4174 if (opts->x_ix86_recip_name)
4176 char *p = ASTRDUP (opts->x_ix86_recip_name);
4177 char *q;
4178 unsigned int mask, i;
4179 bool invert;
4181 while ((q = strtok (p, ",")) != NULL)
4183 p = NULL;
4184 if (*q == '!')
4186 invert = true;
4187 q++;
4189 else
4190 invert = false;
4192 if (!strcmp (q, "default"))
4193 mask = RECIP_MASK_ALL;
4194 else
4196 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4197 if (!strcmp (q, recip_options[i].string))
4199 mask = recip_options[i].mask;
4200 break;
4203 if (i == ARRAY_SIZE (recip_options))
4205 error ("unknown option for -mrecip=%s", q);
4206 invert = false;
4207 mask = RECIP_MASK_NONE;
4211 opts->x_recip_mask_explicit |= mask;
4212 if (invert)
4213 opts->x_recip_mask &= ~mask;
4214 else
4215 opts->x_recip_mask |= mask;
4219 if (TARGET_RECIP_P (opts->x_target_flags))
4220 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4221 else if (opts_set->x_target_flags & MASK_RECIP)
4222 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4224 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4225 for 64-bit Bionic. */
4226 if (TARGET_HAS_BIONIC
4227 && !(opts_set->x_target_flags
4228 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4229 opts->x_target_flags |= (TARGET_64BIT
4230 ? MASK_LONG_DOUBLE_128
4231 : MASK_LONG_DOUBLE_64);
4233 /* Only one of them can be active. */
4234 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4235 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4237 /* Save the initial options in case the user does function specific
4238 options. */
4239 if (main_args_p)
4240 target_option_default_node = target_option_current_node
4241 = build_target_option_node (opts);
4243 /* Handle stack protector */
4244 if (!opts_set->x_ix86_stack_protector_guard)
4245 opts->x_ix86_stack_protector_guard
4246 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4248 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4249 if (opts->x_ix86_tune_memcpy_strategy)
4251 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4252 ix86_parse_stringop_strategy_string (str, false);
4253 free (str);
4256 if (opts->x_ix86_tune_memset_strategy)
4258 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4259 ix86_parse_stringop_strategy_string (str, true);
4260 free (str);
4264 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4266 static void
4267 ix86_option_override (void)
4269 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4270 static struct register_pass_info insert_vzeroupper_info
4271 = { pass_insert_vzeroupper, "reload",
4272 1, PASS_POS_INSERT_AFTER
4275 ix86_option_override_internal (true, &global_options, &global_options_set);
4278 /* This needs to be done at start up. It's convenient to do it here. */
4279 register_pass (&insert_vzeroupper_info);
4282 /* Update register usage after having seen the compiler flags. */
4284 static void
4285 ix86_conditional_register_usage (void)
4287 int i, c_mask;
4288 unsigned int j;
4290 /* The PIC register, if it exists, is fixed. */
4291 j = PIC_OFFSET_TABLE_REGNUM;
4292 if (j != INVALID_REGNUM)
4293 fixed_regs[j] = call_used_regs[j] = 1;
4295 /* For 32-bit targets, squash the REX registers. */
4296 if (! TARGET_64BIT)
4298 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4299 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4300 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4301 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4302 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4303 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4306 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4307 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4308 : TARGET_64BIT ? (1 << 2)
4309 : (1 << 1));
4311 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4313 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4315 /* Set/reset conditionally defined registers from
4316 CALL_USED_REGISTERS initializer. */
4317 if (call_used_regs[i] > 1)
4318 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4320 /* Calculate registers of CLOBBERED_REGS register set
4321 as call used registers from GENERAL_REGS register set. */
4322 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4323 && call_used_regs[i])
4324 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4327 /* If MMX is disabled, squash the registers. */
4328 if (! TARGET_MMX)
4329 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4330 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4331 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4333 /* If SSE is disabled, squash the registers. */
4334 if (! TARGET_SSE)
4335 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4336 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4337 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4339 /* If the FPU is disabled, squash the registers. */
4340 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4341 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4342 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4343 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4345 /* If AVX512F is disabled, squash the registers. */
4346 if (! TARGET_AVX512F)
4348 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4349 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4351 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4352 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4355 /* If MPX is disabled, squash the registers. */
4356 if (! TARGET_MPX)
4357 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4358 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4362 /* Save the current options */
4364 static void
4365 ix86_function_specific_save (struct cl_target_option *ptr,
4366 struct gcc_options *opts)
4368 ptr->arch = ix86_arch;
4369 ptr->schedule = ix86_schedule;
4370 ptr->tune = ix86_tune;
4371 ptr->branch_cost = ix86_branch_cost;
4372 ptr->tune_defaulted = ix86_tune_defaulted;
4373 ptr->arch_specified = ix86_arch_specified;
4374 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4375 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4376 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4377 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4378 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4379 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4380 ptr->x_ix86_abi = opts->x_ix86_abi;
4381 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4382 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4383 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4384 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4385 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4386 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4387 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4388 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4389 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4390 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4391 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4392 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4393 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4394 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4395 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4396 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4397 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4398 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4399 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4400 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4402 /* The fields are char but the variables are not; make sure the
4403 values fit in the fields. */
4404 gcc_assert (ptr->arch == ix86_arch);
4405 gcc_assert (ptr->schedule == ix86_schedule);
4406 gcc_assert (ptr->tune == ix86_tune);
4407 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4410 /* Restore the current options */
4412 static void
4413 ix86_function_specific_restore (struct gcc_options *opts,
4414 struct cl_target_option *ptr)
4416 enum processor_type old_tune = ix86_tune;
4417 enum processor_type old_arch = ix86_arch;
4418 unsigned int ix86_arch_mask;
4419 int i;
4421 /* We don't change -fPIC. */
4422 opts->x_flag_pic = flag_pic;
4424 ix86_arch = (enum processor_type) ptr->arch;
4425 ix86_schedule = (enum attr_cpu) ptr->schedule;
4426 ix86_tune = (enum processor_type) ptr->tune;
4427 opts->x_ix86_branch_cost = ptr->branch_cost;
4428 ix86_tune_defaulted = ptr->tune_defaulted;
4429 ix86_arch_specified = ptr->arch_specified;
4430 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4431 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4432 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4433 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4434 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4435 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4436 opts->x_ix86_abi = ptr->x_ix86_abi;
4437 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4438 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4439 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4440 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4441 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4442 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4443 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4444 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4445 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4446 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4447 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4448 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4449 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4450 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4451 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4452 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4453 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4454 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4455 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4456 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4458 /* Recreate the arch feature tests if the arch changed */
4459 if (old_arch != ix86_arch)
4461 ix86_arch_mask = 1u << ix86_arch;
4462 for (i = 0; i < X86_ARCH_LAST; ++i)
4463 ix86_arch_features[i]
4464 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4467 /* Recreate the tune optimization tests */
4468 if (old_tune != ix86_tune)
4469 set_ix86_tune_features (ix86_tune, false);
4472 /* Print the current options */
4474 static void
4475 ix86_function_specific_print (FILE *file, int indent,
4476 struct cl_target_option *ptr)
4478 char *target_string
4479 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4480 NULL, NULL, ptr->x_ix86_fpmath, false);
4482 gcc_assert (ptr->arch < PROCESSOR_max);
4483 fprintf (file, "%*sarch = %d (%s)\n",
4484 indent, "",
4485 ptr->arch, processor_target_table[ptr->arch].name);
4487 gcc_assert (ptr->tune < PROCESSOR_max);
4488 fprintf (file, "%*stune = %d (%s)\n",
4489 indent, "",
4490 ptr->tune, processor_target_table[ptr->tune].name);
4492 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4494 if (target_string)
4496 fprintf (file, "%*s%s\n", indent, "", target_string);
4497 free (target_string);
4502 /* Inner function to process the attribute((target(...))), take an argument and
4503 set the current options from the argument. If we have a list, recursively go
4504 over the list. */
4506 static bool
4507 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4508 struct gcc_options *opts,
4509 struct gcc_options *opts_set,
4510 struct gcc_options *enum_opts_set)
4512 char *next_optstr;
4513 bool ret = true;
4515 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4516 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4517 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4518 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4519 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4521 enum ix86_opt_type
4523 ix86_opt_unknown,
4524 ix86_opt_yes,
4525 ix86_opt_no,
4526 ix86_opt_str,
4527 ix86_opt_enum,
4528 ix86_opt_isa
4531 static const struct
4533 const char *string;
4534 size_t len;
4535 enum ix86_opt_type type;
4536 int opt;
4537 int mask;
4538 } attrs[] = {
4539 /* isa options */
4540 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4541 IX86_ATTR_ISA ("abm", OPT_mabm),
4542 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4543 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4544 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4545 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4546 IX86_ATTR_ISA ("aes", OPT_maes),
4547 IX86_ATTR_ISA ("sha", OPT_msha),
4548 IX86_ATTR_ISA ("avx", OPT_mavx),
4549 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4550 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4551 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4552 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4553 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4554 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4555 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4556 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4557 IX86_ATTR_ISA ("sse", OPT_msse),
4558 IX86_ATTR_ISA ("sse2", OPT_msse2),
4559 IX86_ATTR_ISA ("sse3", OPT_msse3),
4560 IX86_ATTR_ISA ("sse4", OPT_msse4),
4561 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4562 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4563 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4564 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4565 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4566 IX86_ATTR_ISA ("fma", OPT_mfma),
4567 IX86_ATTR_ISA ("xop", OPT_mxop),
4568 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4569 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4570 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4571 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4572 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4573 IX86_ATTR_ISA ("hle", OPT_mhle),
4574 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4575 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4576 IX86_ATTR_ISA ("adx", OPT_madx),
4577 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4578 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4579 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4580 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4582 /* enum options */
4583 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4585 /* string options */
4586 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4587 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4589 /* flag options */
4590 IX86_ATTR_YES ("cld",
4591 OPT_mcld,
4592 MASK_CLD),
4594 IX86_ATTR_NO ("fancy-math-387",
4595 OPT_mfancy_math_387,
4596 MASK_NO_FANCY_MATH_387),
4598 IX86_ATTR_YES ("ieee-fp",
4599 OPT_mieee_fp,
4600 MASK_IEEE_FP),
4602 IX86_ATTR_YES ("inline-all-stringops",
4603 OPT_minline_all_stringops,
4604 MASK_INLINE_ALL_STRINGOPS),
4606 IX86_ATTR_YES ("inline-stringops-dynamically",
4607 OPT_minline_stringops_dynamically,
4608 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4610 IX86_ATTR_NO ("align-stringops",
4611 OPT_mno_align_stringops,
4612 MASK_NO_ALIGN_STRINGOPS),
4614 IX86_ATTR_YES ("recip",
4615 OPT_mrecip,
4616 MASK_RECIP),
4620 /* If this is a list, recurse to get the options. */
4621 if (TREE_CODE (args) == TREE_LIST)
4623 bool ret = true;
4625 for (; args; args = TREE_CHAIN (args))
4626 if (TREE_VALUE (args)
4627 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4628 p_strings, opts, opts_set,
4629 enum_opts_set))
4630 ret = false;
4632 return ret;
4635 else if (TREE_CODE (args) != STRING_CST)
4637 error ("attribute %<target%> argument not a string");
4638 return false;
4641 /* Handle multiple arguments separated by commas. */
4642 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4644 while (next_optstr && *next_optstr != '\0')
4646 char *p = next_optstr;
4647 char *orig_p = p;
4648 char *comma = strchr (next_optstr, ',');
4649 const char *opt_string;
4650 size_t len, opt_len;
4651 int opt;
4652 bool opt_set_p;
4653 char ch;
4654 unsigned i;
4655 enum ix86_opt_type type = ix86_opt_unknown;
4656 int mask = 0;
4658 if (comma)
4660 *comma = '\0';
4661 len = comma - next_optstr;
4662 next_optstr = comma + 1;
4664 else
4666 len = strlen (p);
4667 next_optstr = NULL;
4670 /* Recognize no-xxx. */
4671 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4673 opt_set_p = false;
4674 p += 3;
4675 len -= 3;
4677 else
4678 opt_set_p = true;
4680 /* Find the option. */
4681 ch = *p;
4682 opt = N_OPTS;
4683 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4685 type = attrs[i].type;
4686 opt_len = attrs[i].len;
4687 if (ch == attrs[i].string[0]
4688 && ((type != ix86_opt_str && type != ix86_opt_enum)
4689 ? len == opt_len
4690 : len > opt_len)
4691 && memcmp (p, attrs[i].string, opt_len) == 0)
4693 opt = attrs[i].opt;
4694 mask = attrs[i].mask;
4695 opt_string = attrs[i].string;
4696 break;
4700 /* Process the option. */
4701 if (opt == N_OPTS)
4703 error ("attribute(target(\"%s\")) is unknown", orig_p);
4704 ret = false;
4707 else if (type == ix86_opt_isa)
4709 struct cl_decoded_option decoded;
4711 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4712 ix86_handle_option (opts, opts_set,
4713 &decoded, input_location);
4716 else if (type == ix86_opt_yes || type == ix86_opt_no)
4718 if (type == ix86_opt_no)
4719 opt_set_p = !opt_set_p;
4721 if (opt_set_p)
4722 opts->x_target_flags |= mask;
4723 else
4724 opts->x_target_flags &= ~mask;
4727 else if (type == ix86_opt_str)
4729 if (p_strings[opt])
4731 error ("option(\"%s\") was already specified", opt_string);
4732 ret = false;
4734 else
4735 p_strings[opt] = xstrdup (p + opt_len);
4738 else if (type == ix86_opt_enum)
4740 bool arg_ok;
4741 int value;
4743 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4744 if (arg_ok)
4745 set_option (opts, enum_opts_set, opt, value,
4746 p + opt_len, DK_UNSPECIFIED, input_location,
4747 global_dc);
4748 else
4750 error ("attribute(target(\"%s\")) is unknown", orig_p);
4751 ret = false;
4755 else
4756 gcc_unreachable ();
4759 return ret;
4762 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4764 tree
4765 ix86_valid_target_attribute_tree (tree args,
4766 struct gcc_options *opts,
4767 struct gcc_options *opts_set)
4769 const char *orig_arch_string = opts->x_ix86_arch_string;
4770 const char *orig_tune_string = opts->x_ix86_tune_string;
4771 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4772 int orig_tune_defaulted = ix86_tune_defaulted;
4773 int orig_arch_specified = ix86_arch_specified;
4774 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4775 tree t = NULL_TREE;
4776 int i;
4777 struct cl_target_option *def
4778 = TREE_TARGET_OPTION (target_option_default_node);
4779 struct gcc_options enum_opts_set;
4781 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4783 /* Process each of the options on the chain. */
4784 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4785 opts_set, &enum_opts_set))
4786 return error_mark_node;
4788 /* If the changed options are different from the default, rerun
4789 ix86_option_override_internal, and then save the options away.
4790 The string options are are attribute options, and will be undone
4791 when we copy the save structure. */
4792 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4793 || opts->x_target_flags != def->x_target_flags
4794 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4795 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4796 || enum_opts_set.x_ix86_fpmath)
4798 /* If we are using the default tune= or arch=, undo the string assigned,
4799 and use the default. */
4800 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4801 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4802 else if (!orig_arch_specified)
4803 opts->x_ix86_arch_string = NULL;
4805 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4806 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4807 else if (orig_tune_defaulted)
4808 opts->x_ix86_tune_string = NULL;
4810 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4811 if (enum_opts_set.x_ix86_fpmath)
4812 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4813 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4814 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4816 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4817 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4820 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4821 ix86_option_override_internal (false, opts, opts_set);
4823 /* Add any builtin functions with the new isa if any. */
4824 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4826 /* Save the current options unless we are validating options for
4827 #pragma. */
4828 t = build_target_option_node (opts);
4830 opts->x_ix86_arch_string = orig_arch_string;
4831 opts->x_ix86_tune_string = orig_tune_string;
4832 opts_set->x_ix86_fpmath = orig_fpmath_set;
4834 /* Free up memory allocated to hold the strings */
4835 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4836 free (option_strings[i]);
4839 return t;
4842 /* Hook to validate attribute((target("string"))). */
4844 static bool
4845 ix86_valid_target_attribute_p (tree fndecl,
4846 tree ARG_UNUSED (name),
4847 tree args,
4848 int ARG_UNUSED (flags))
4850 struct gcc_options func_options;
4851 tree new_target, new_optimize;
4852 bool ret = true;
4854 /* attribute((target("default"))) does nothing, beyond
4855 affecting multi-versioning. */
4856 if (TREE_VALUE (args)
4857 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4858 && TREE_CHAIN (args) == NULL_TREE
4859 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4860 return true;
4862 tree old_optimize = build_optimization_node (&global_options);
4864 /* Get the optimization options of the current function. */
4865 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4867 if (!func_optimize)
4868 func_optimize = old_optimize;
4870 /* Init func_options. */
4871 memset (&func_options, 0, sizeof (func_options));
4872 init_options_struct (&func_options, NULL);
4873 lang_hooks.init_options_struct (&func_options);
4875 cl_optimization_restore (&func_options,
4876 TREE_OPTIMIZATION (func_optimize));
4878 /* Initialize func_options to the default before its target options can
4879 be set. */
4880 cl_target_option_restore (&func_options,
4881 TREE_TARGET_OPTION (target_option_default_node));
4883 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4884 &global_options_set);
4886 new_optimize = build_optimization_node (&func_options);
4888 if (new_target == error_mark_node)
4889 ret = false;
4891 else if (fndecl && new_target)
4893 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4895 if (old_optimize != new_optimize)
4896 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4899 return ret;
4903 /* Hook to determine if one function can safely inline another. */
4905 static bool
4906 ix86_can_inline_p (tree caller, tree callee)
4908 bool ret = false;
4909 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4910 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4912 /* If callee has no option attributes, then it is ok to inline. */
4913 if (!callee_tree)
4914 ret = true;
4916 /* If caller has no option attributes, but callee does then it is not ok to
4917 inline. */
4918 else if (!caller_tree)
4919 ret = false;
4921 else
4923 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4924 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4926 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4927 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4928 function. */
4929 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4930 != callee_opts->x_ix86_isa_flags)
4931 ret = false;
4933 /* See if we have the same non-isa options. */
4934 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4935 ret = false;
4937 /* See if arch, tune, etc. are the same. */
4938 else if (caller_opts->arch != callee_opts->arch)
4939 ret = false;
4941 else if (caller_opts->tune != callee_opts->tune)
4942 ret = false;
4944 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4945 ret = false;
4947 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4948 ret = false;
4950 else
4951 ret = true;
4954 return ret;
4958 /* Remember the last target of ix86_set_current_function. */
4959 static GTY(()) tree ix86_previous_fndecl;
4961 /* Invalidate ix86_previous_fndecl cache. */
4962 void
4963 ix86_reset_previous_fndecl (void)
4965 ix86_previous_fndecl = NULL_TREE;
4968 /* Establish appropriate back-end context for processing the function
4969 FNDECL. The argument might be NULL to indicate processing at top
4970 level, outside of any function scope. */
4971 static void
4972 ix86_set_current_function (tree fndecl)
4974 /* Only change the context if the function changes. This hook is called
4975 several times in the course of compiling a function, and we don't want to
4976 slow things down too much or call target_reinit when it isn't safe. */
4977 if (fndecl && fndecl != ix86_previous_fndecl)
4979 tree old_tree = (ix86_previous_fndecl
4980 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4981 : NULL_TREE);
4983 tree new_tree = (fndecl
4984 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4985 : NULL_TREE);
4987 ix86_previous_fndecl = fndecl;
4988 if (old_tree == new_tree)
4991 else if (new_tree)
4993 cl_target_option_restore (&global_options,
4994 TREE_TARGET_OPTION (new_tree));
4995 if (TREE_TARGET_GLOBALS (new_tree))
4996 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4997 else
4998 TREE_TARGET_GLOBALS (new_tree)
4999 = save_target_globals_default_opts ();
5002 else if (old_tree)
5004 new_tree = target_option_current_node;
5005 cl_target_option_restore (&global_options,
5006 TREE_TARGET_OPTION (new_tree));
5007 if (TREE_TARGET_GLOBALS (new_tree))
5008 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5009 else if (new_tree == target_option_default_node)
5010 restore_target_globals (&default_target_globals);
5011 else
5012 TREE_TARGET_GLOBALS (new_tree)
5013 = save_target_globals_default_opts ();
5019 /* Return true if this goes in large data/bss. */
5021 static bool
5022 ix86_in_large_data_p (tree exp)
5024 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5025 return false;
5027 /* Functions are never large data. */
5028 if (TREE_CODE (exp) == FUNCTION_DECL)
5029 return false;
5031 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5033 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5034 if (strcmp (section, ".ldata") == 0
5035 || strcmp (section, ".lbss") == 0)
5036 return true;
5037 return false;
5039 else
5041 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5043 /* If this is an incomplete type with size 0, then we can't put it
5044 in data because it might be too big when completed. */
5045 if (!size || size > ix86_section_threshold)
5046 return true;
5049 return false;
5052 /* Switch to the appropriate section for output of DECL.
5053 DECL is either a `VAR_DECL' node or a constant of some sort.
5054 RELOC indicates whether forming the initial value of DECL requires
5055 link-time relocations. */
5057 ATTRIBUTE_UNUSED static section *
5058 x86_64_elf_select_section (tree decl, int reloc,
5059 unsigned HOST_WIDE_INT align)
5061 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5062 && ix86_in_large_data_p (decl))
5064 const char *sname = NULL;
5065 unsigned int flags = SECTION_WRITE;
5066 switch (categorize_decl_for_section (decl, reloc))
5068 case SECCAT_DATA:
5069 sname = ".ldata";
5070 break;
5071 case SECCAT_DATA_REL:
5072 sname = ".ldata.rel";
5073 break;
5074 case SECCAT_DATA_REL_LOCAL:
5075 sname = ".ldata.rel.local";
5076 break;
5077 case SECCAT_DATA_REL_RO:
5078 sname = ".ldata.rel.ro";
5079 break;
5080 case SECCAT_DATA_REL_RO_LOCAL:
5081 sname = ".ldata.rel.ro.local";
5082 break;
5083 case SECCAT_BSS:
5084 sname = ".lbss";
5085 flags |= SECTION_BSS;
5086 break;
5087 case SECCAT_RODATA:
5088 case SECCAT_RODATA_MERGE_STR:
5089 case SECCAT_RODATA_MERGE_STR_INIT:
5090 case SECCAT_RODATA_MERGE_CONST:
5091 sname = ".lrodata";
5092 flags = 0;
5093 break;
5094 case SECCAT_SRODATA:
5095 case SECCAT_SDATA:
5096 case SECCAT_SBSS:
5097 gcc_unreachable ();
5098 case SECCAT_TEXT:
5099 case SECCAT_TDATA:
5100 case SECCAT_TBSS:
5101 /* We don't split these for medium model. Place them into
5102 default sections and hope for best. */
5103 break;
5105 if (sname)
5107 /* We might get called with string constants, but get_named_section
5108 doesn't like them as they are not DECLs. Also, we need to set
5109 flags in that case. */
5110 if (!DECL_P (decl))
5111 return get_section (sname, flags, NULL);
5112 return get_named_section (decl, sname, reloc);
5115 return default_elf_select_section (decl, reloc, align);
5118 /* Select a set of attributes for section NAME based on the properties
5119 of DECL and whether or not RELOC indicates that DECL's initializer
5120 might contain runtime relocations. */
5122 static unsigned int ATTRIBUTE_UNUSED
5123 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5125 unsigned int flags = default_section_type_flags (decl, name, reloc);
5127 if (decl == NULL_TREE
5128 && (strcmp (name, ".ldata.rel.ro") == 0
5129 || strcmp (name, ".ldata.rel.ro.local") == 0))
5130 flags |= SECTION_RELRO;
5132 if (strcmp (name, ".lbss") == 0
5133 || strncmp (name, ".lbss.", 5) == 0
5134 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5135 flags |= SECTION_BSS;
5137 return flags;
5140 /* Build up a unique section name, expressed as a
5141 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5142 RELOC indicates whether the initial value of EXP requires
5143 link-time relocations. */
5145 static void ATTRIBUTE_UNUSED
5146 x86_64_elf_unique_section (tree decl, int reloc)
5148 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5149 && ix86_in_large_data_p (decl))
5151 const char *prefix = NULL;
5152 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5153 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5155 switch (categorize_decl_for_section (decl, reloc))
5157 case SECCAT_DATA:
5158 case SECCAT_DATA_REL:
5159 case SECCAT_DATA_REL_LOCAL:
5160 case SECCAT_DATA_REL_RO:
5161 case SECCAT_DATA_REL_RO_LOCAL:
5162 prefix = one_only ? ".ld" : ".ldata";
5163 break;
5164 case SECCAT_BSS:
5165 prefix = one_only ? ".lb" : ".lbss";
5166 break;
5167 case SECCAT_RODATA:
5168 case SECCAT_RODATA_MERGE_STR:
5169 case SECCAT_RODATA_MERGE_STR_INIT:
5170 case SECCAT_RODATA_MERGE_CONST:
5171 prefix = one_only ? ".lr" : ".lrodata";
5172 break;
5173 case SECCAT_SRODATA:
5174 case SECCAT_SDATA:
5175 case SECCAT_SBSS:
5176 gcc_unreachable ();
5177 case SECCAT_TEXT:
5178 case SECCAT_TDATA:
5179 case SECCAT_TBSS:
5180 /* We don't split these for medium model. Place them into
5181 default sections and hope for best. */
5182 break;
5184 if (prefix)
5186 const char *name, *linkonce;
5187 char *string;
5189 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5190 name = targetm.strip_name_encoding (name);
5192 /* If we're using one_only, then there needs to be a .gnu.linkonce
5193 prefix to the section name. */
5194 linkonce = one_only ? ".gnu.linkonce" : "";
5196 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5198 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5199 return;
5202 default_unique_section (decl, reloc);
5205 #ifdef COMMON_ASM_OP
5206 /* This says how to output assembler code to declare an
5207 uninitialized external linkage data object.
5209 For medium model x86-64 we need to use .largecomm opcode for
5210 large objects. */
5211 void
5212 x86_elf_aligned_common (FILE *file,
5213 const char *name, unsigned HOST_WIDE_INT size,
5214 int align)
5216 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5217 && size > (unsigned int)ix86_section_threshold)
5218 fputs (".largecomm\t", file);
5219 else
5220 fputs (COMMON_ASM_OP, file);
5221 assemble_name (file, name);
5222 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5223 size, align / BITS_PER_UNIT);
5225 #endif
5227 /* Utility function for targets to use in implementing
5228 ASM_OUTPUT_ALIGNED_BSS. */
5230 void
5231 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5232 const char *name, unsigned HOST_WIDE_INT size,
5233 int align)
5235 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5236 && size > (unsigned int)ix86_section_threshold)
5237 switch_to_section (get_named_section (decl, ".lbss", 0));
5238 else
5239 switch_to_section (bss_section);
5240 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5241 #ifdef ASM_DECLARE_OBJECT_NAME
5242 last_assemble_variable_decl = decl;
5243 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5244 #else
5245 /* Standard thing is just output label for the object. */
5246 ASM_OUTPUT_LABEL (file, name);
5247 #endif /* ASM_DECLARE_OBJECT_NAME */
5248 ASM_OUTPUT_SKIP (file, size ? size : 1);
5251 /* Decide whether we must probe the stack before any space allocation
5252 on this target. It's essentially TARGET_STACK_PROBE except when
5253 -fstack-check causes the stack to be already probed differently. */
5255 bool
5256 ix86_target_stack_probe (void)
5258 /* Do not probe the stack twice if static stack checking is enabled. */
5259 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5260 return false;
5262 return TARGET_STACK_PROBE;
5265 /* Decide whether we can make a sibling call to a function. DECL is the
5266 declaration of the function being targeted by the call and EXP is the
5267 CALL_EXPR representing the call. */
5269 static bool
5270 ix86_function_ok_for_sibcall (tree decl, tree exp)
5272 tree type, decl_or_type;
5273 rtx a, b;
5275 /* If we are generating position-independent code, we cannot sibcall
5276 optimize any indirect call, or a direct call to a global function,
5277 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5278 if (!TARGET_MACHO
5279 && !TARGET_64BIT
5280 && flag_pic
5281 && (!decl || !targetm.binds_local_p (decl)))
5282 return false;
5284 /* If we need to align the outgoing stack, then sibcalling would
5285 unalign the stack, which may break the called function. */
5286 if (ix86_minimum_incoming_stack_boundary (true)
5287 < PREFERRED_STACK_BOUNDARY)
5288 return false;
5290 if (decl)
5292 decl_or_type = decl;
5293 type = TREE_TYPE (decl);
5295 else
5297 /* We're looking at the CALL_EXPR, we need the type of the function. */
5298 type = CALL_EXPR_FN (exp); /* pointer expression */
5299 type = TREE_TYPE (type); /* pointer type */
5300 type = TREE_TYPE (type); /* function type */
5301 decl_or_type = type;
5304 /* Check that the return value locations are the same. Like
5305 if we are returning floats on the 80387 register stack, we cannot
5306 make a sibcall from a function that doesn't return a float to a
5307 function that does or, conversely, from a function that does return
5308 a float to a function that doesn't; the necessary stack adjustment
5309 would not be executed. This is also the place we notice
5310 differences in the return value ABI. Note that it is ok for one
5311 of the functions to have void return type as long as the return
5312 value of the other is passed in a register. */
5313 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5314 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5315 cfun->decl, false);
5316 if (STACK_REG_P (a) || STACK_REG_P (b))
5318 if (!rtx_equal_p (a, b))
5319 return false;
5321 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5323 else if (!rtx_equal_p (a, b))
5324 return false;
5326 if (TARGET_64BIT)
5328 /* The SYSV ABI has more call-clobbered registers;
5329 disallow sibcalls from MS to SYSV. */
5330 if (cfun->machine->call_abi == MS_ABI
5331 && ix86_function_type_abi (type) == SYSV_ABI)
5332 return false;
5334 else
5336 /* If this call is indirect, we'll need to be able to use a
5337 call-clobbered register for the address of the target function.
5338 Make sure that all such registers are not used for passing
5339 parameters. Note that DLLIMPORT functions are indirect. */
5340 if (!decl
5341 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5343 if (ix86_function_regparm (type, NULL) >= 3)
5345 /* ??? Need to count the actual number of registers to be used,
5346 not the possible number of registers. Fix later. */
5347 return false;
5352 /* Otherwise okay. That also includes certain types of indirect calls. */
5353 return true;
5356 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5357 and "sseregparm" calling convention attributes;
5358 arguments as in struct attribute_spec.handler. */
5360 static tree
5361 ix86_handle_cconv_attribute (tree *node, tree name,
5362 tree args,
5363 int flags ATTRIBUTE_UNUSED,
5364 bool *no_add_attrs)
5366 if (TREE_CODE (*node) != FUNCTION_TYPE
5367 && TREE_CODE (*node) != METHOD_TYPE
5368 && TREE_CODE (*node) != FIELD_DECL
5369 && TREE_CODE (*node) != TYPE_DECL)
5371 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5372 name);
5373 *no_add_attrs = true;
5374 return NULL_TREE;
5377 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5378 if (is_attribute_p ("regparm", name))
5380 tree cst;
5382 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5384 error ("fastcall and regparm attributes are not compatible");
5387 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5389 error ("regparam and thiscall attributes are not compatible");
5392 cst = TREE_VALUE (args);
5393 if (TREE_CODE (cst) != INTEGER_CST)
5395 warning (OPT_Wattributes,
5396 "%qE attribute requires an integer constant argument",
5397 name);
5398 *no_add_attrs = true;
5400 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5402 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5403 name, REGPARM_MAX);
5404 *no_add_attrs = true;
5407 return NULL_TREE;
5410 if (TARGET_64BIT)
5412 /* Do not warn when emulating the MS ABI. */
5413 if ((TREE_CODE (*node) != FUNCTION_TYPE
5414 && TREE_CODE (*node) != METHOD_TYPE)
5415 || ix86_function_type_abi (*node) != MS_ABI)
5416 warning (OPT_Wattributes, "%qE attribute ignored",
5417 name);
5418 *no_add_attrs = true;
5419 return NULL_TREE;
5422 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5423 if (is_attribute_p ("fastcall", name))
5425 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5427 error ("fastcall and cdecl attributes are not compatible");
5429 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5431 error ("fastcall and stdcall attributes are not compatible");
5433 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5435 error ("fastcall and regparm attributes are not compatible");
5437 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5439 error ("fastcall and thiscall attributes are not compatible");
5443 /* Can combine stdcall with fastcall (redundant), regparm and
5444 sseregparm. */
5445 else if (is_attribute_p ("stdcall", name))
5447 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5449 error ("stdcall and cdecl attributes are not compatible");
5451 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5453 error ("stdcall and fastcall attributes are not compatible");
5455 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5457 error ("stdcall and thiscall attributes are not compatible");
5461 /* Can combine cdecl with regparm and sseregparm. */
5462 else if (is_attribute_p ("cdecl", name))
5464 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5466 error ("stdcall and cdecl attributes are not compatible");
5468 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5470 error ("fastcall and cdecl attributes are not compatible");
5472 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5474 error ("cdecl and thiscall attributes are not compatible");
5477 else if (is_attribute_p ("thiscall", name))
5479 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5480 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5481 name);
5482 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5484 error ("stdcall and thiscall attributes are not compatible");
5486 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5488 error ("fastcall and thiscall attributes are not compatible");
5490 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5492 error ("cdecl and thiscall attributes are not compatible");
5496 /* Can combine sseregparm with all attributes. */
5498 return NULL_TREE;
5501 /* The transactional memory builtins are implicitly regparm or fastcall
5502 depending on the ABI. Override the generic do-nothing attribute that
5503 these builtins were declared with, and replace it with one of the two
5504 attributes that we expect elsewhere. */
5506 static tree
5507 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5508 tree args ATTRIBUTE_UNUSED,
5509 int flags, bool *no_add_attrs)
5511 tree alt;
5513 /* In no case do we want to add the placeholder attribute. */
5514 *no_add_attrs = true;
5516 /* The 64-bit ABI is unchanged for transactional memory. */
5517 if (TARGET_64BIT)
5518 return NULL_TREE;
5520 /* ??? Is there a better way to validate 32-bit windows? We have
5521 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5522 if (CHECK_STACK_LIMIT > 0)
5523 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5524 else
5526 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5527 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5529 decl_attributes (node, alt, flags);
5531 return NULL_TREE;
5534 /* This function determines from TYPE the calling-convention. */
5536 unsigned int
5537 ix86_get_callcvt (const_tree type)
5539 unsigned int ret = 0;
5540 bool is_stdarg;
5541 tree attrs;
5543 if (TARGET_64BIT)
5544 return IX86_CALLCVT_CDECL;
5546 attrs = TYPE_ATTRIBUTES (type);
5547 if (attrs != NULL_TREE)
5549 if (lookup_attribute ("cdecl", attrs))
5550 ret |= IX86_CALLCVT_CDECL;
5551 else if (lookup_attribute ("stdcall", attrs))
5552 ret |= IX86_CALLCVT_STDCALL;
5553 else if (lookup_attribute ("fastcall", attrs))
5554 ret |= IX86_CALLCVT_FASTCALL;
5555 else if (lookup_attribute ("thiscall", attrs))
5556 ret |= IX86_CALLCVT_THISCALL;
5558 /* Regparam isn't allowed for thiscall and fastcall. */
5559 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5561 if (lookup_attribute ("regparm", attrs))
5562 ret |= IX86_CALLCVT_REGPARM;
5563 if (lookup_attribute ("sseregparm", attrs))
5564 ret |= IX86_CALLCVT_SSEREGPARM;
5567 if (IX86_BASE_CALLCVT(ret) != 0)
5568 return ret;
5571 is_stdarg = stdarg_p (type);
5572 if (TARGET_RTD && !is_stdarg)
5573 return IX86_CALLCVT_STDCALL | ret;
5575 if (ret != 0
5576 || is_stdarg
5577 || TREE_CODE (type) != METHOD_TYPE
5578 || ix86_function_type_abi (type) != MS_ABI)
5579 return IX86_CALLCVT_CDECL | ret;
5581 return IX86_CALLCVT_THISCALL;
5584 /* Return 0 if the attributes for two types are incompatible, 1 if they
5585 are compatible, and 2 if they are nearly compatible (which causes a
5586 warning to be generated). */
5588 static int
5589 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5591 unsigned int ccvt1, ccvt2;
5593 if (TREE_CODE (type1) != FUNCTION_TYPE
5594 && TREE_CODE (type1) != METHOD_TYPE)
5595 return 1;
5597 ccvt1 = ix86_get_callcvt (type1);
5598 ccvt2 = ix86_get_callcvt (type2);
5599 if (ccvt1 != ccvt2)
5600 return 0;
5601 if (ix86_function_regparm (type1, NULL)
5602 != ix86_function_regparm (type2, NULL))
5603 return 0;
5605 return 1;
5608 /* Return the regparm value for a function with the indicated TYPE and DECL.
5609 DECL may be NULL when calling function indirectly
5610 or considering a libcall. */
5612 static int
5613 ix86_function_regparm (const_tree type, const_tree decl)
5615 tree attr;
5616 int regparm;
5617 unsigned int ccvt;
5619 if (TARGET_64BIT)
5620 return (ix86_function_type_abi (type) == SYSV_ABI
5621 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5622 ccvt = ix86_get_callcvt (type);
5623 regparm = ix86_regparm;
5625 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5627 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5628 if (attr)
5630 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5631 return regparm;
5634 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5635 return 2;
5636 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5637 return 1;
5639 /* Use register calling convention for local functions when possible. */
5640 if (decl
5641 && TREE_CODE (decl) == FUNCTION_DECL
5642 /* Caller and callee must agree on the calling convention, so
5643 checking here just optimize means that with
5644 __attribute__((optimize (...))) caller could use regparm convention
5645 and callee not, or vice versa. Instead look at whether the callee
5646 is optimized or not. */
5647 && opt_for_fn (decl, optimize)
5648 && !(profile_flag && !flag_fentry))
5650 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5651 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5652 if (i && i->local && i->can_change_signature)
5654 int local_regparm, globals = 0, regno;
5656 /* Make sure no regparm register is taken by a
5657 fixed register variable. */
5658 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5659 if (fixed_regs[local_regparm])
5660 break;
5662 /* We don't want to use regparm(3) for nested functions as
5663 these use a static chain pointer in the third argument. */
5664 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5665 local_regparm = 2;
5667 /* In 32-bit mode save a register for the split stack. */
5668 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5669 local_regparm = 2;
5671 /* Each fixed register usage increases register pressure,
5672 so less registers should be used for argument passing.
5673 This functionality can be overriden by an explicit
5674 regparm value. */
5675 for (regno = AX_REG; regno <= DI_REG; regno++)
5676 if (fixed_regs[regno])
5677 globals++;
5679 local_regparm
5680 = globals < local_regparm ? local_regparm - globals : 0;
5682 if (local_regparm > regparm)
5683 regparm = local_regparm;
5687 return regparm;
5690 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5691 DFmode (2) arguments in SSE registers for a function with the
5692 indicated TYPE and DECL. DECL may be NULL when calling function
5693 indirectly or considering a libcall. Otherwise return 0. */
5695 static int
5696 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5698 gcc_assert (!TARGET_64BIT);
5700 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5701 by the sseregparm attribute. */
5702 if (TARGET_SSEREGPARM
5703 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5705 if (!TARGET_SSE)
5707 if (warn)
5709 if (decl)
5710 error ("calling %qD with attribute sseregparm without "
5711 "SSE/SSE2 enabled", decl);
5712 else
5713 error ("calling %qT with attribute sseregparm without "
5714 "SSE/SSE2 enabled", type);
5716 return 0;
5719 return 2;
5722 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5723 (and DFmode for SSE2) arguments in SSE registers. */
5724 if (decl && TARGET_SSE_MATH && optimize
5725 && !(profile_flag && !flag_fentry))
5727 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5728 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5729 if (i && i->local && i->can_change_signature)
5730 return TARGET_SSE2 ? 2 : 1;
5733 return 0;
5736 /* Return true if EAX is live at the start of the function. Used by
5737 ix86_expand_prologue to determine if we need special help before
5738 calling allocate_stack_worker. */
5740 static bool
5741 ix86_eax_live_at_start_p (void)
5743 /* Cheat. Don't bother working forward from ix86_function_regparm
5744 to the function type to whether an actual argument is located in
5745 eax. Instead just look at cfg info, which is still close enough
5746 to correct at this point. This gives false positives for broken
5747 functions that might use uninitialized data that happens to be
5748 allocated in eax, but who cares? */
5749 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5752 static bool
5753 ix86_keep_aggregate_return_pointer (tree fntype)
5755 tree attr;
5757 if (!TARGET_64BIT)
5759 attr = lookup_attribute ("callee_pop_aggregate_return",
5760 TYPE_ATTRIBUTES (fntype));
5761 if (attr)
5762 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5764 /* For 32-bit MS-ABI the default is to keep aggregate
5765 return pointer. */
5766 if (ix86_function_type_abi (fntype) == MS_ABI)
5767 return true;
5769 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5772 /* Value is the number of bytes of arguments automatically
5773 popped when returning from a subroutine call.
5774 FUNDECL is the declaration node of the function (as a tree),
5775 FUNTYPE is the data type of the function (as a tree),
5776 or for a library call it is an identifier node for the subroutine name.
5777 SIZE is the number of bytes of arguments passed on the stack.
5779 On the 80386, the RTD insn may be used to pop them if the number
5780 of args is fixed, but if the number is variable then the caller
5781 must pop them all. RTD can't be used for library calls now
5782 because the library is compiled with the Unix compiler.
5783 Use of RTD is a selectable option, since it is incompatible with
5784 standard Unix calling sequences. If the option is not selected,
5785 the caller must always pop the args.
5787 The attribute stdcall is equivalent to RTD on a per module basis. */
5789 static int
5790 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5792 unsigned int ccvt;
5794 /* None of the 64-bit ABIs pop arguments. */
5795 if (TARGET_64BIT)
5796 return 0;
5798 ccvt = ix86_get_callcvt (funtype);
5800 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5801 | IX86_CALLCVT_THISCALL)) != 0
5802 && ! stdarg_p (funtype))
5803 return size;
5805 /* Lose any fake structure return argument if it is passed on the stack. */
5806 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5807 && !ix86_keep_aggregate_return_pointer (funtype))
5809 int nregs = ix86_function_regparm (funtype, fundecl);
5810 if (nregs == 0)
5811 return GET_MODE_SIZE (Pmode);
5814 return 0;
5817 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5819 static bool
5820 ix86_legitimate_combined_insn (rtx insn)
5822 /* Check operand constraints in case hard registers were propagated
5823 into insn pattern. This check prevents combine pass from
5824 generating insn patterns with invalid hard register operands.
5825 These invalid insns can eventually confuse reload to error out
5826 with a spill failure. See also PRs 46829 and 46843. */
5827 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5829 int i;
5831 extract_insn (insn);
5832 preprocess_constraints ();
5834 for (i = 0; i < recog_data.n_operands; i++)
5836 rtx op = recog_data.operand[i];
5837 enum machine_mode mode = GET_MODE (op);
5838 struct operand_alternative *op_alt;
5839 int offset = 0;
5840 bool win;
5841 int j;
5843 /* For pre-AVX disallow unaligned loads/stores where the
5844 instructions don't support it. */
5845 if (!TARGET_AVX
5846 && VECTOR_MODE_P (GET_MODE (op))
5847 && misaligned_operand (op, GET_MODE (op)))
5849 int min_align = get_attr_ssememalign (insn);
5850 if (min_align == 0)
5851 return false;
5854 /* A unary operator may be accepted by the predicate, but it
5855 is irrelevant for matching constraints. */
5856 if (UNARY_P (op))
5857 op = XEXP (op, 0);
5859 if (GET_CODE (op) == SUBREG)
5861 if (REG_P (SUBREG_REG (op))
5862 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5863 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5864 GET_MODE (SUBREG_REG (op)),
5865 SUBREG_BYTE (op),
5866 GET_MODE (op));
5867 op = SUBREG_REG (op);
5870 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5871 continue;
5873 op_alt = recog_op_alt[i];
5875 /* Operand has no constraints, anything is OK. */
5876 win = !recog_data.n_alternatives;
5878 for (j = 0; j < recog_data.n_alternatives; j++)
5880 if (op_alt[j].anything_ok
5881 || (op_alt[j].matches != -1
5882 && operands_match_p
5883 (recog_data.operand[i],
5884 recog_data.operand[op_alt[j].matches]))
5885 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5887 win = true;
5888 break;
5892 if (!win)
5893 return false;
5897 return true;
5900 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5902 static unsigned HOST_WIDE_INT
5903 ix86_asan_shadow_offset (void)
5905 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5906 : HOST_WIDE_INT_C (0x7fff8000))
5907 : (HOST_WIDE_INT_1 << 29);
5910 /* Argument support functions. */
5912 /* Return true when register may be used to pass function parameters. */
5913 bool
5914 ix86_function_arg_regno_p (int regno)
5916 int i;
5917 const int *parm_regs;
5919 if (!TARGET_64BIT)
5921 if (TARGET_MACHO)
5922 return (regno < REGPARM_MAX
5923 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5924 else
5925 return (regno < REGPARM_MAX
5926 || (TARGET_MMX && MMX_REGNO_P (regno)
5927 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5928 || (TARGET_SSE && SSE_REGNO_P (regno)
5929 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5932 if (TARGET_SSE && SSE_REGNO_P (regno)
5933 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5934 return true;
5936 /* TODO: The function should depend on current function ABI but
5937 builtins.c would need updating then. Therefore we use the
5938 default ABI. */
5940 /* RAX is used as hidden argument to va_arg functions. */
5941 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5942 return true;
5944 if (ix86_abi == MS_ABI)
5945 parm_regs = x86_64_ms_abi_int_parameter_registers;
5946 else
5947 parm_regs = x86_64_int_parameter_registers;
5948 for (i = 0; i < (ix86_abi == MS_ABI
5949 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5950 if (regno == parm_regs[i])
5951 return true;
5952 return false;
5955 /* Return if we do not know how to pass TYPE solely in registers. */
5957 static bool
5958 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5960 if (must_pass_in_stack_var_size_or_pad (mode, type))
5961 return true;
5963 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5964 The layout_type routine is crafty and tries to trick us into passing
5965 currently unsupported vector types on the stack by using TImode. */
5966 return (!TARGET_64BIT && mode == TImode
5967 && type && TREE_CODE (type) != VECTOR_TYPE);
5970 /* It returns the size, in bytes, of the area reserved for arguments passed
5971 in registers for the function represented by fndecl dependent to the used
5972 abi format. */
5974 ix86_reg_parm_stack_space (const_tree fndecl)
5976 enum calling_abi call_abi = SYSV_ABI;
5977 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5978 call_abi = ix86_function_abi (fndecl);
5979 else
5980 call_abi = ix86_function_type_abi (fndecl);
5981 if (TARGET_64BIT && call_abi == MS_ABI)
5982 return 32;
5983 return 0;
5986 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5987 call abi used. */
5988 enum calling_abi
5989 ix86_function_type_abi (const_tree fntype)
5991 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5993 enum calling_abi abi = ix86_abi;
5994 if (abi == SYSV_ABI)
5996 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5997 abi = MS_ABI;
5999 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6000 abi = SYSV_ABI;
6001 return abi;
6003 return ix86_abi;
6006 /* We add this as a workaround in order to use libc_has_function
6007 hook in i386.md. */
6008 bool
6009 ix86_libc_has_function (enum function_class fn_class)
6011 return targetm.libc_has_function (fn_class);
6014 static bool
6015 ix86_function_ms_hook_prologue (const_tree fn)
6017 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6019 if (decl_function_context (fn) != NULL_TREE)
6020 error_at (DECL_SOURCE_LOCATION (fn),
6021 "ms_hook_prologue is not compatible with nested function");
6022 else
6023 return true;
6025 return false;
6028 static enum calling_abi
6029 ix86_function_abi (const_tree fndecl)
6031 if (! fndecl)
6032 return ix86_abi;
6033 return ix86_function_type_abi (TREE_TYPE (fndecl));
6036 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6037 call abi used. */
6038 enum calling_abi
6039 ix86_cfun_abi (void)
6041 if (! cfun)
6042 return ix86_abi;
6043 return cfun->machine->call_abi;
6046 /* Write the extra assembler code needed to declare a function properly. */
6048 void
6049 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6050 tree decl)
6052 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6054 if (is_ms_hook)
6056 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6057 unsigned int filler_cc = 0xcccccccc;
6059 for (i = 0; i < filler_count; i += 4)
6060 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6063 #ifdef SUBTARGET_ASM_UNWIND_INIT
6064 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6065 #endif
6067 ASM_OUTPUT_LABEL (asm_out_file, fname);
6069 /* Output magic byte marker, if hot-patch attribute is set. */
6070 if (is_ms_hook)
6072 if (TARGET_64BIT)
6074 /* leaq [%rsp + 0], %rsp */
6075 asm_fprintf (asm_out_file, ASM_BYTE
6076 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6078 else
6080 /* movl.s %edi, %edi
6081 push %ebp
6082 movl.s %esp, %ebp */
6083 asm_fprintf (asm_out_file, ASM_BYTE
6084 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6089 /* regclass.c */
6090 extern void init_regs (void);
6092 /* Implementation of call abi switching target hook. Specific to FNDECL
6093 the specific call register sets are set. See also
6094 ix86_conditional_register_usage for more details. */
6095 void
6096 ix86_call_abi_override (const_tree fndecl)
6098 if (fndecl == NULL_TREE)
6099 cfun->machine->call_abi = ix86_abi;
6100 else
6101 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6104 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6105 expensive re-initialization of init_regs each time we switch function context
6106 since this is needed only during RTL expansion. */
6107 static void
6108 ix86_maybe_switch_abi (void)
6110 if (TARGET_64BIT &&
6111 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6112 reinit_regs ();
6115 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6116 for a call to a function whose data type is FNTYPE.
6117 For a library call, FNTYPE is 0. */
6119 void
6120 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6121 tree fntype, /* tree ptr for function decl */
6122 rtx libname, /* SYMBOL_REF of library name or 0 */
6123 tree fndecl,
6124 int caller)
6126 struct cgraph_local_info *i;
6128 memset (cum, 0, sizeof (*cum));
6130 if (fndecl)
6132 i = cgraph_local_info (fndecl);
6133 cum->call_abi = ix86_function_abi (fndecl);
6135 else
6137 i = NULL;
6138 cum->call_abi = ix86_function_type_abi (fntype);
6141 cum->caller = caller;
6143 /* Set up the number of registers to use for passing arguments. */
6144 cum->nregs = ix86_regparm;
6145 if (TARGET_64BIT)
6147 cum->nregs = (cum->call_abi == SYSV_ABI
6148 ? X86_64_REGPARM_MAX
6149 : X86_64_MS_REGPARM_MAX);
6151 if (TARGET_SSE)
6153 cum->sse_nregs = SSE_REGPARM_MAX;
6154 if (TARGET_64BIT)
6156 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6157 ? X86_64_SSE_REGPARM_MAX
6158 : X86_64_MS_SSE_REGPARM_MAX);
6161 if (TARGET_MMX)
6162 cum->mmx_nregs = MMX_REGPARM_MAX;
6163 cum->warn_avx512f = true;
6164 cum->warn_avx = true;
6165 cum->warn_sse = true;
6166 cum->warn_mmx = true;
6168 /* Because type might mismatch in between caller and callee, we need to
6169 use actual type of function for local calls.
6170 FIXME: cgraph_analyze can be told to actually record if function uses
6171 va_start so for local functions maybe_vaarg can be made aggressive
6172 helping K&R code.
6173 FIXME: once typesytem is fixed, we won't need this code anymore. */
6174 if (i && i->local && i->can_change_signature)
6175 fntype = TREE_TYPE (fndecl);
6176 cum->stdarg = fntype ? stdarg_p (fntype) : false;
6177 cum->maybe_vaarg = (fntype
6178 ? (!prototype_p (fntype) || stdarg_p (fntype))
6179 : !libname);
6181 cum->bnd_regno = FIRST_BND_REG;
6182 cum->bnds_in_bt = 0;
6183 cum->force_bnd_pass = 0;
6185 if (!TARGET_64BIT)
6187 /* If there are variable arguments, then we won't pass anything
6188 in registers in 32-bit mode. */
6189 if (stdarg_p (fntype))
6191 cum->nregs = 0;
6192 cum->sse_nregs = 0;
6193 cum->mmx_nregs = 0;
6194 cum->warn_avx512f = false;
6195 cum->warn_avx = false;
6196 cum->warn_sse = false;
6197 cum->warn_mmx = false;
6198 return;
6201 /* Use ecx and edx registers if function has fastcall attribute,
6202 else look for regparm information. */
6203 if (fntype)
6205 unsigned int ccvt = ix86_get_callcvt (fntype);
6206 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6208 cum->nregs = 1;
6209 cum->fastcall = 1; /* Same first register as in fastcall. */
6211 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6213 cum->nregs = 2;
6214 cum->fastcall = 1;
6216 else
6217 cum->nregs = ix86_function_regparm (fntype, fndecl);
6220 /* Set up the number of SSE registers used for passing SFmode
6221 and DFmode arguments. Warn for mismatching ABI. */
6222 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6226 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6227 But in the case of vector types, it is some vector mode.
6229 When we have only some of our vector isa extensions enabled, then there
6230 are some modes for which vector_mode_supported_p is false. For these
6231 modes, the generic vector support in gcc will choose some non-vector mode
6232 in order to implement the type. By computing the natural mode, we'll
6233 select the proper ABI location for the operand and not depend on whatever
6234 the middle-end decides to do with these vector types.
6236 The midde-end can't deal with the vector types > 16 bytes. In this
6237 case, we return the original mode and warn ABI change if CUM isn't
6238 NULL.
6240 If INT_RETURN is true, warn ABI change if the vector mode isn't
6241 available for function return value. */
6243 static enum machine_mode
6244 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6245 bool in_return)
6247 enum machine_mode mode = TYPE_MODE (type);
6249 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6251 HOST_WIDE_INT size = int_size_in_bytes (type);
6252 if ((size == 8 || size == 16 || size == 32 || size == 64)
6253 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6254 && TYPE_VECTOR_SUBPARTS (type) > 1)
6256 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6258 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6259 mode = MIN_MODE_VECTOR_FLOAT;
6260 else
6261 mode = MIN_MODE_VECTOR_INT;
6263 /* Get the mode which has this inner mode and number of units. */
6264 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6265 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6266 && GET_MODE_INNER (mode) == innermode)
6268 if (size == 64 && !TARGET_AVX512F)
6270 static bool warnedavx512f;
6271 static bool warnedavx512f_ret;
6273 if (cum && cum->warn_avx512f && !warnedavx512f)
6275 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6276 "without AVX512F enabled changes the ABI"))
6277 warnedavx512f = true;
6279 else if (in_return && !warnedavx512f_ret)
6281 if (warning (OPT_Wpsabi, "AVX512F vector return "
6282 "without AVX512F enabled changes the ABI"))
6283 warnedavx512f_ret = true;
6286 return TYPE_MODE (type);
6288 else if (size == 32 && !TARGET_AVX)
6290 static bool warnedavx;
6291 static bool warnedavx_ret;
6293 if (cum && cum->warn_avx && !warnedavx)
6295 if (warning (OPT_Wpsabi, "AVX vector argument "
6296 "without AVX enabled changes the ABI"))
6297 warnedavx = true;
6299 else if (in_return && !warnedavx_ret)
6301 if (warning (OPT_Wpsabi, "AVX vector return "
6302 "without AVX enabled changes the ABI"))
6303 warnedavx_ret = true;
6306 return TYPE_MODE (type);
6308 else if (((size == 8 && TARGET_64BIT) || size == 16)
6309 && !TARGET_SSE)
6311 static bool warnedsse;
6312 static bool warnedsse_ret;
6314 if (cum && cum->warn_sse && !warnedsse)
6316 if (warning (OPT_Wpsabi, "SSE vector argument "
6317 "without SSE enabled changes the ABI"))
6318 warnedsse = true;
6320 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6322 if (warning (OPT_Wpsabi, "SSE vector return "
6323 "without SSE enabled changes the ABI"))
6324 warnedsse_ret = true;
6327 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6329 static bool warnedmmx;
6330 static bool warnedmmx_ret;
6332 if (cum && cum->warn_mmx && !warnedmmx)
6334 if (warning (OPT_Wpsabi, "MMX vector argument "
6335 "without MMX enabled changes the ABI"))
6336 warnedmmx = true;
6338 else if (in_return && !warnedmmx_ret)
6340 if (warning (OPT_Wpsabi, "MMX vector return "
6341 "without MMX enabled changes the ABI"))
6342 warnedmmx_ret = true;
6345 return mode;
6348 gcc_unreachable ();
6352 return mode;
6355 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6356 this may not agree with the mode that the type system has chosen for the
6357 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6358 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6360 static rtx
6361 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6362 unsigned int regno)
6364 rtx tmp;
6366 if (orig_mode != BLKmode)
6367 tmp = gen_rtx_REG (orig_mode, regno);
6368 else
6370 tmp = gen_rtx_REG (mode, regno);
6371 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6372 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6375 return tmp;
6378 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6379 of this code is to classify each 8bytes of incoming argument by the register
6380 class and assign registers accordingly. */
6382 /* Return the union class of CLASS1 and CLASS2.
6383 See the x86-64 PS ABI for details. */
6385 static enum x86_64_reg_class
6386 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6388 /* Rule #1: If both classes are equal, this is the resulting class. */
6389 if (class1 == class2)
6390 return class1;
6392 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6393 the other class. */
6394 if (class1 == X86_64_NO_CLASS)
6395 return class2;
6396 if (class2 == X86_64_NO_CLASS)
6397 return class1;
6399 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6400 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6401 return X86_64_MEMORY_CLASS;
6403 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6404 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6405 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6406 return X86_64_INTEGERSI_CLASS;
6407 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6408 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6409 return X86_64_INTEGER_CLASS;
6411 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6412 MEMORY is used. */
6413 if (class1 == X86_64_X87_CLASS
6414 || class1 == X86_64_X87UP_CLASS
6415 || class1 == X86_64_COMPLEX_X87_CLASS
6416 || class2 == X86_64_X87_CLASS
6417 || class2 == X86_64_X87UP_CLASS
6418 || class2 == X86_64_COMPLEX_X87_CLASS)
6419 return X86_64_MEMORY_CLASS;
6421 /* Rule #6: Otherwise class SSE is used. */
6422 return X86_64_SSE_CLASS;
6425 /* Classify the argument of type TYPE and mode MODE.
6426 CLASSES will be filled by the register class used to pass each word
6427 of the operand. The number of words is returned. In case the parameter
6428 should be passed in memory, 0 is returned. As a special case for zero
6429 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6431 BIT_OFFSET is used internally for handling records and specifies offset
6432 of the offset in bits modulo 512 to avoid overflow cases.
6434 See the x86-64 PS ABI for details.
6437 static int
6438 classify_argument (enum machine_mode mode, const_tree type,
6439 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6441 HOST_WIDE_INT bytes =
6442 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6443 int words
6444 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6446 /* Variable sized entities are always passed/returned in memory. */
6447 if (bytes < 0)
6448 return 0;
6450 if (mode != VOIDmode
6451 && targetm.calls.must_pass_in_stack (mode, type))
6452 return 0;
6454 if (type && AGGREGATE_TYPE_P (type))
6456 int i;
6457 tree field;
6458 enum x86_64_reg_class subclasses[MAX_CLASSES];
6460 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6461 if (bytes > 64)
6462 return 0;
6464 for (i = 0; i < words; i++)
6465 classes[i] = X86_64_NO_CLASS;
6467 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6468 signalize memory class, so handle it as special case. */
6469 if (!words)
6471 classes[0] = X86_64_NO_CLASS;
6472 return 1;
6475 /* Classify each field of record and merge classes. */
6476 switch (TREE_CODE (type))
6478 case RECORD_TYPE:
6479 /* And now merge the fields of structure. */
6480 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6482 if (TREE_CODE (field) == FIELD_DECL)
6484 int num;
6486 if (TREE_TYPE (field) == error_mark_node)
6487 continue;
6489 /* Bitfields are always classified as integer. Handle them
6490 early, since later code would consider them to be
6491 misaligned integers. */
6492 if (DECL_BIT_FIELD (field))
6494 for (i = (int_bit_position (field)
6495 + (bit_offset % 64)) / 8 / 8;
6496 i < ((int_bit_position (field) + (bit_offset % 64))
6497 + tree_to_shwi (DECL_SIZE (field))
6498 + 63) / 8 / 8; i++)
6499 classes[i] =
6500 merge_classes (X86_64_INTEGER_CLASS,
6501 classes[i]);
6503 else
6505 int pos;
6507 type = TREE_TYPE (field);
6509 /* Flexible array member is ignored. */
6510 if (TYPE_MODE (type) == BLKmode
6511 && TREE_CODE (type) == ARRAY_TYPE
6512 && TYPE_SIZE (type) == NULL_TREE
6513 && TYPE_DOMAIN (type) != NULL_TREE
6514 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6515 == NULL_TREE))
6517 static bool warned;
6519 if (!warned && warn_psabi)
6521 warned = true;
6522 inform (input_location,
6523 "the ABI of passing struct with"
6524 " a flexible array member has"
6525 " changed in GCC 4.4");
6527 continue;
6529 num = classify_argument (TYPE_MODE (type), type,
6530 subclasses,
6531 (int_bit_position (field)
6532 + bit_offset) % 512);
6533 if (!num)
6534 return 0;
6535 pos = (int_bit_position (field)
6536 + (bit_offset % 64)) / 8 / 8;
6537 for (i = 0; i < num && (i + pos) < words; i++)
6538 classes[i + pos] =
6539 merge_classes (subclasses[i], classes[i + pos]);
6543 break;
6545 case ARRAY_TYPE:
6546 /* Arrays are handled as small records. */
6548 int num;
6549 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6550 TREE_TYPE (type), subclasses, bit_offset);
6551 if (!num)
6552 return 0;
6554 /* The partial classes are now full classes. */
6555 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6556 subclasses[0] = X86_64_SSE_CLASS;
6557 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6558 && !((bit_offset % 64) == 0 && bytes == 4))
6559 subclasses[0] = X86_64_INTEGER_CLASS;
6561 for (i = 0; i < words; i++)
6562 classes[i] = subclasses[i % num];
6564 break;
6566 case UNION_TYPE:
6567 case QUAL_UNION_TYPE:
6568 /* Unions are similar to RECORD_TYPE but offset is always 0.
6570 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6572 if (TREE_CODE (field) == FIELD_DECL)
6574 int num;
6576 if (TREE_TYPE (field) == error_mark_node)
6577 continue;
6579 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6580 TREE_TYPE (field), subclasses,
6581 bit_offset);
6582 if (!num)
6583 return 0;
6584 for (i = 0; i < num; i++)
6585 classes[i] = merge_classes (subclasses[i], classes[i]);
6588 break;
6590 default:
6591 gcc_unreachable ();
6594 if (words > 2)
6596 /* When size > 16 bytes, if the first one isn't
6597 X86_64_SSE_CLASS or any other ones aren't
6598 X86_64_SSEUP_CLASS, everything should be passed in
6599 memory. */
6600 if (classes[0] != X86_64_SSE_CLASS)
6601 return 0;
6603 for (i = 1; i < words; i++)
6604 if (classes[i] != X86_64_SSEUP_CLASS)
6605 return 0;
6608 /* Final merger cleanup. */
6609 for (i = 0; i < words; i++)
6611 /* If one class is MEMORY, everything should be passed in
6612 memory. */
6613 if (classes[i] == X86_64_MEMORY_CLASS)
6614 return 0;
6616 /* The X86_64_SSEUP_CLASS should be always preceded by
6617 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6618 if (classes[i] == X86_64_SSEUP_CLASS
6619 && classes[i - 1] != X86_64_SSE_CLASS
6620 && classes[i - 1] != X86_64_SSEUP_CLASS)
6622 /* The first one should never be X86_64_SSEUP_CLASS. */
6623 gcc_assert (i != 0);
6624 classes[i] = X86_64_SSE_CLASS;
6627 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6628 everything should be passed in memory. */
6629 if (classes[i] == X86_64_X87UP_CLASS
6630 && (classes[i - 1] != X86_64_X87_CLASS))
6632 static bool warned;
6634 /* The first one should never be X86_64_X87UP_CLASS. */
6635 gcc_assert (i != 0);
6636 if (!warned && warn_psabi)
6638 warned = true;
6639 inform (input_location,
6640 "the ABI of passing union with long double"
6641 " has changed in GCC 4.4");
6643 return 0;
6646 return words;
6649 /* Compute alignment needed. We align all types to natural boundaries with
6650 exception of XFmode that is aligned to 64bits. */
6651 if (mode != VOIDmode && mode != BLKmode)
6653 int mode_alignment = GET_MODE_BITSIZE (mode);
6655 if (mode == XFmode)
6656 mode_alignment = 128;
6657 else if (mode == XCmode)
6658 mode_alignment = 256;
6659 if (COMPLEX_MODE_P (mode))
6660 mode_alignment /= 2;
6661 /* Misaligned fields are always returned in memory. */
6662 if (bit_offset % mode_alignment)
6663 return 0;
6666 /* for V1xx modes, just use the base mode */
6667 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6668 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6669 mode = GET_MODE_INNER (mode);
6671 /* Classification of atomic types. */
6672 switch (mode)
6674 case SDmode:
6675 case DDmode:
6676 classes[0] = X86_64_SSE_CLASS;
6677 return 1;
6678 case TDmode:
6679 classes[0] = X86_64_SSE_CLASS;
6680 classes[1] = X86_64_SSEUP_CLASS;
6681 return 2;
6682 case DImode:
6683 case SImode:
6684 case HImode:
6685 case QImode:
6686 case CSImode:
6687 case CHImode:
6688 case CQImode:
6690 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6692 /* Analyze last 128 bits only. */
6693 size = (size - 1) & 0x7f;
6695 if (size < 32)
6697 classes[0] = X86_64_INTEGERSI_CLASS;
6698 return 1;
6700 else if (size < 64)
6702 classes[0] = X86_64_INTEGER_CLASS;
6703 return 1;
6705 else if (size < 64+32)
6707 classes[0] = X86_64_INTEGER_CLASS;
6708 classes[1] = X86_64_INTEGERSI_CLASS;
6709 return 2;
6711 else if (size < 64+64)
6713 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6714 return 2;
6716 else
6717 gcc_unreachable ();
6719 case CDImode:
6720 case TImode:
6721 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6722 return 2;
6723 case COImode:
6724 case OImode:
6725 /* OImode shouldn't be used directly. */
6726 gcc_unreachable ();
6727 case CTImode:
6728 return 0;
6729 case SFmode:
6730 if (!(bit_offset % 64))
6731 classes[0] = X86_64_SSESF_CLASS;
6732 else
6733 classes[0] = X86_64_SSE_CLASS;
6734 return 1;
6735 case DFmode:
6736 classes[0] = X86_64_SSEDF_CLASS;
6737 return 1;
6738 case XFmode:
6739 classes[0] = X86_64_X87_CLASS;
6740 classes[1] = X86_64_X87UP_CLASS;
6741 return 2;
6742 case TFmode:
6743 classes[0] = X86_64_SSE_CLASS;
6744 classes[1] = X86_64_SSEUP_CLASS;
6745 return 2;
6746 case SCmode:
6747 classes[0] = X86_64_SSE_CLASS;
6748 if (!(bit_offset % 64))
6749 return 1;
6750 else
6752 static bool warned;
6754 if (!warned && warn_psabi)
6756 warned = true;
6757 inform (input_location,
6758 "the ABI of passing structure with complex float"
6759 " member has changed in GCC 4.4");
6761 classes[1] = X86_64_SSESF_CLASS;
6762 return 2;
6764 case DCmode:
6765 classes[0] = X86_64_SSEDF_CLASS;
6766 classes[1] = X86_64_SSEDF_CLASS;
6767 return 2;
6768 case XCmode:
6769 classes[0] = X86_64_COMPLEX_X87_CLASS;
6770 return 1;
6771 case TCmode:
6772 /* This modes is larger than 16 bytes. */
6773 return 0;
6774 case V8SFmode:
6775 case V8SImode:
6776 case V32QImode:
6777 case V16HImode:
6778 case V4DFmode:
6779 case V4DImode:
6780 classes[0] = X86_64_SSE_CLASS;
6781 classes[1] = X86_64_SSEUP_CLASS;
6782 classes[2] = X86_64_SSEUP_CLASS;
6783 classes[3] = X86_64_SSEUP_CLASS;
6784 return 4;
6785 case V8DFmode:
6786 case V16SFmode:
6787 case V8DImode:
6788 case V16SImode:
6789 case V32HImode:
6790 case V64QImode:
6791 classes[0] = X86_64_SSE_CLASS;
6792 classes[1] = X86_64_SSEUP_CLASS;
6793 classes[2] = X86_64_SSEUP_CLASS;
6794 classes[3] = X86_64_SSEUP_CLASS;
6795 classes[4] = X86_64_SSEUP_CLASS;
6796 classes[5] = X86_64_SSEUP_CLASS;
6797 classes[6] = X86_64_SSEUP_CLASS;
6798 classes[7] = X86_64_SSEUP_CLASS;
6799 return 8;
6800 case V4SFmode:
6801 case V4SImode:
6802 case V16QImode:
6803 case V8HImode:
6804 case V2DFmode:
6805 case V2DImode:
6806 classes[0] = X86_64_SSE_CLASS;
6807 classes[1] = X86_64_SSEUP_CLASS;
6808 return 2;
6809 case V1TImode:
6810 case V1DImode:
6811 case V2SFmode:
6812 case V2SImode:
6813 case V4HImode:
6814 case V8QImode:
6815 classes[0] = X86_64_SSE_CLASS;
6816 return 1;
6817 case BLKmode:
6818 case VOIDmode:
6819 return 0;
6820 default:
6821 gcc_assert (VECTOR_MODE_P (mode));
6823 if (bytes > 16)
6824 return 0;
6826 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6828 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6829 classes[0] = X86_64_INTEGERSI_CLASS;
6830 else
6831 classes[0] = X86_64_INTEGER_CLASS;
6832 classes[1] = X86_64_INTEGER_CLASS;
6833 return 1 + (bytes > 8);
6837 /* Examine the argument and return set number of register required in each
6838 class. Return 0 iff parameter should be passed in memory. */
6839 static int
6840 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6841 int *int_nregs, int *sse_nregs)
6843 enum x86_64_reg_class regclass[MAX_CLASSES];
6844 int n = classify_argument (mode, type, regclass, 0);
6846 *int_nregs = 0;
6847 *sse_nregs = 0;
6848 if (!n)
6849 return 0;
6850 for (n--; n >= 0; n--)
6851 switch (regclass[n])
6853 case X86_64_INTEGER_CLASS:
6854 case X86_64_INTEGERSI_CLASS:
6855 (*int_nregs)++;
6856 break;
6857 case X86_64_SSE_CLASS:
6858 case X86_64_SSESF_CLASS:
6859 case X86_64_SSEDF_CLASS:
6860 (*sse_nregs)++;
6861 break;
6862 case X86_64_NO_CLASS:
6863 case X86_64_SSEUP_CLASS:
6864 break;
6865 case X86_64_X87_CLASS:
6866 case X86_64_X87UP_CLASS:
6867 if (!in_return)
6868 return 0;
6869 break;
6870 case X86_64_COMPLEX_X87_CLASS:
6871 return in_return ? 2 : 0;
6872 case X86_64_MEMORY_CLASS:
6873 gcc_unreachable ();
6875 return 1;
6878 /* Construct container for the argument used by GCC interface. See
6879 FUNCTION_ARG for the detailed description. */
6881 static rtx
6882 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6883 const_tree type, int in_return, int nintregs, int nsseregs,
6884 const int *intreg, int sse_regno)
6886 /* The following variables hold the static issued_error state. */
6887 static bool issued_sse_arg_error;
6888 static bool issued_sse_ret_error;
6889 static bool issued_x87_ret_error;
6891 enum machine_mode tmpmode;
6892 int bytes =
6893 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6894 enum x86_64_reg_class regclass[MAX_CLASSES];
6895 int n;
6896 int i;
6897 int nexps = 0;
6898 int needed_sseregs, needed_intregs;
6899 rtx exp[MAX_CLASSES];
6900 rtx ret;
6902 n = classify_argument (mode, type, regclass, 0);
6903 if (!n)
6904 return NULL;
6905 if (!examine_argument (mode, type, in_return, &needed_intregs,
6906 &needed_sseregs))
6907 return NULL;
6908 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6909 return NULL;
6911 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6912 some less clueful developer tries to use floating-point anyway. */
6913 if (needed_sseregs && !TARGET_SSE)
6915 if (in_return)
6917 if (!issued_sse_ret_error)
6919 error ("SSE register return with SSE disabled");
6920 issued_sse_ret_error = true;
6923 else if (!issued_sse_arg_error)
6925 error ("SSE register argument with SSE disabled");
6926 issued_sse_arg_error = true;
6928 return NULL;
6931 /* Likewise, error if the ABI requires us to return values in the
6932 x87 registers and the user specified -mno-80387. */
6933 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6934 for (i = 0; i < n; i++)
6935 if (regclass[i] == X86_64_X87_CLASS
6936 || regclass[i] == X86_64_X87UP_CLASS
6937 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6939 if (!issued_x87_ret_error)
6941 error ("x87 register return with x87 disabled");
6942 issued_x87_ret_error = true;
6944 return NULL;
6947 /* First construct simple cases. Avoid SCmode, since we want to use
6948 single register to pass this type. */
6949 if (n == 1 && mode != SCmode)
6950 switch (regclass[0])
6952 case X86_64_INTEGER_CLASS:
6953 case X86_64_INTEGERSI_CLASS:
6954 return gen_rtx_REG (mode, intreg[0]);
6955 case X86_64_SSE_CLASS:
6956 case X86_64_SSESF_CLASS:
6957 case X86_64_SSEDF_CLASS:
6958 if (mode != BLKmode)
6959 return gen_reg_or_parallel (mode, orig_mode,
6960 SSE_REGNO (sse_regno));
6961 break;
6962 case X86_64_X87_CLASS:
6963 case X86_64_COMPLEX_X87_CLASS:
6964 return gen_rtx_REG (mode, FIRST_STACK_REG);
6965 case X86_64_NO_CLASS:
6966 /* Zero sized array, struct or class. */
6967 return NULL;
6968 default:
6969 gcc_unreachable ();
6971 if (n == 2
6972 && regclass[0] == X86_64_SSE_CLASS
6973 && regclass[1] == X86_64_SSEUP_CLASS
6974 && mode != BLKmode)
6975 return gen_reg_or_parallel (mode, orig_mode,
6976 SSE_REGNO (sse_regno));
6977 if (n == 4
6978 && regclass[0] == X86_64_SSE_CLASS
6979 && regclass[1] == X86_64_SSEUP_CLASS
6980 && regclass[2] == X86_64_SSEUP_CLASS
6981 && regclass[3] == X86_64_SSEUP_CLASS
6982 && mode != BLKmode)
6983 return gen_reg_or_parallel (mode, orig_mode,
6984 SSE_REGNO (sse_regno));
6985 if (n == 8
6986 && regclass[0] == X86_64_SSE_CLASS
6987 && regclass[1] == X86_64_SSEUP_CLASS
6988 && regclass[2] == X86_64_SSEUP_CLASS
6989 && regclass[3] == X86_64_SSEUP_CLASS
6990 && regclass[4] == X86_64_SSEUP_CLASS
6991 && regclass[5] == X86_64_SSEUP_CLASS
6992 && regclass[6] == X86_64_SSEUP_CLASS
6993 && regclass[7] == X86_64_SSEUP_CLASS
6994 && mode != BLKmode)
6995 return gen_reg_or_parallel (mode, orig_mode,
6996 SSE_REGNO (sse_regno));
6997 if (n == 2
6998 && regclass[0] == X86_64_X87_CLASS
6999 && regclass[1] == X86_64_X87UP_CLASS)
7000 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7002 if (n == 2
7003 && regclass[0] == X86_64_INTEGER_CLASS
7004 && regclass[1] == X86_64_INTEGER_CLASS
7005 && (mode == CDImode || mode == TImode)
7006 && intreg[0] + 1 == intreg[1])
7007 return gen_rtx_REG (mode, intreg[0]);
7009 /* Otherwise figure out the entries of the PARALLEL. */
7010 for (i = 0; i < n; i++)
7012 int pos;
7014 switch (regclass[i])
7016 case X86_64_NO_CLASS:
7017 break;
7018 case X86_64_INTEGER_CLASS:
7019 case X86_64_INTEGERSI_CLASS:
7020 /* Merge TImodes on aligned occasions here too. */
7021 if (i * 8 + 8 > bytes)
7022 tmpmode
7023 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7024 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7025 tmpmode = SImode;
7026 else
7027 tmpmode = DImode;
7028 /* We've requested 24 bytes we
7029 don't have mode for. Use DImode. */
7030 if (tmpmode == BLKmode)
7031 tmpmode = DImode;
7032 exp [nexps++]
7033 = gen_rtx_EXPR_LIST (VOIDmode,
7034 gen_rtx_REG (tmpmode, *intreg),
7035 GEN_INT (i*8));
7036 intreg++;
7037 break;
7038 case X86_64_SSESF_CLASS:
7039 exp [nexps++]
7040 = gen_rtx_EXPR_LIST (VOIDmode,
7041 gen_rtx_REG (SFmode,
7042 SSE_REGNO (sse_regno)),
7043 GEN_INT (i*8));
7044 sse_regno++;
7045 break;
7046 case X86_64_SSEDF_CLASS:
7047 exp [nexps++]
7048 = gen_rtx_EXPR_LIST (VOIDmode,
7049 gen_rtx_REG (DFmode,
7050 SSE_REGNO (sse_regno)),
7051 GEN_INT (i*8));
7052 sse_regno++;
7053 break;
7054 case X86_64_SSE_CLASS:
7055 pos = i;
7056 switch (n)
7058 case 1:
7059 tmpmode = DImode;
7060 break;
7061 case 2:
7062 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7064 tmpmode = TImode;
7065 i++;
7067 else
7068 tmpmode = DImode;
7069 break;
7070 case 4:
7071 gcc_assert (i == 0
7072 && regclass[1] == X86_64_SSEUP_CLASS
7073 && regclass[2] == X86_64_SSEUP_CLASS
7074 && regclass[3] == X86_64_SSEUP_CLASS);
7075 tmpmode = OImode;
7076 i += 3;
7077 break;
7078 case 8:
7079 gcc_assert (i == 0
7080 && regclass[1] == X86_64_SSEUP_CLASS
7081 && regclass[2] == X86_64_SSEUP_CLASS
7082 && regclass[3] == X86_64_SSEUP_CLASS
7083 && regclass[4] == X86_64_SSEUP_CLASS
7084 && regclass[5] == X86_64_SSEUP_CLASS
7085 && regclass[6] == X86_64_SSEUP_CLASS
7086 && regclass[7] == X86_64_SSEUP_CLASS);
7087 tmpmode = XImode;
7088 i += 7;
7089 break;
7090 default:
7091 gcc_unreachable ();
7093 exp [nexps++]
7094 = gen_rtx_EXPR_LIST (VOIDmode,
7095 gen_rtx_REG (tmpmode,
7096 SSE_REGNO (sse_regno)),
7097 GEN_INT (pos*8));
7098 sse_regno++;
7099 break;
7100 default:
7101 gcc_unreachable ();
7105 /* Empty aligned struct, union or class. */
7106 if (nexps == 0)
7107 return NULL;
7109 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7110 for (i = 0; i < nexps; i++)
7111 XVECEXP (ret, 0, i) = exp [i];
7112 return ret;
7115 /* Update the data in CUM to advance over an argument of mode MODE
7116 and data type TYPE. (TYPE is null for libcalls where that information
7117 may not be available.) */
7119 static int
7120 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7121 const_tree type, HOST_WIDE_INT bytes,
7122 HOST_WIDE_INT words)
7124 int res = 0;
7126 switch (mode)
7128 default:
7129 break;
7131 case BLKmode:
7132 if (bytes < 0)
7133 break;
7134 /* FALLTHRU */
7136 case DImode:
7137 case SImode:
7138 case HImode:
7139 case QImode:
7140 cum->words += words;
7141 cum->nregs -= words;
7142 cum->regno += words;
7143 if (cum->nregs >= 0)
7144 res = words;
7145 if (cum->nregs <= 0)
7147 cum->nregs = 0;
7148 cum->regno = 0;
7150 break;
7152 case OImode:
7153 /* OImode shouldn't be used directly. */
7154 gcc_unreachable ();
7156 case DFmode:
7157 if (cum->float_in_sse < 2)
7158 break;
7159 case SFmode:
7160 if (cum->float_in_sse < 1)
7161 break;
7162 /* FALLTHRU */
7164 case V8SFmode:
7165 case V8SImode:
7166 case V64QImode:
7167 case V32HImode:
7168 case V16SImode:
7169 case V8DImode:
7170 case V16SFmode:
7171 case V8DFmode:
7172 case V32QImode:
7173 case V16HImode:
7174 case V4DFmode:
7175 case V4DImode:
7176 case TImode:
7177 case V16QImode:
7178 case V8HImode:
7179 case V4SImode:
7180 case V2DImode:
7181 case V4SFmode:
7182 case V2DFmode:
7183 if (!type || !AGGREGATE_TYPE_P (type))
7185 cum->sse_words += words;
7186 cum->sse_nregs -= 1;
7187 cum->sse_regno += 1;
7188 if (cum->sse_nregs <= 0)
7190 cum->sse_nregs = 0;
7191 cum->sse_regno = 0;
7194 break;
7196 case V8QImode:
7197 case V4HImode:
7198 case V2SImode:
7199 case V2SFmode:
7200 case V1TImode:
7201 case V1DImode:
7202 if (!type || !AGGREGATE_TYPE_P (type))
7204 cum->mmx_words += words;
7205 cum->mmx_nregs -= 1;
7206 cum->mmx_regno += 1;
7207 if (cum->mmx_nregs <= 0)
7209 cum->mmx_nregs = 0;
7210 cum->mmx_regno = 0;
7213 break;
7216 return res;
7219 static int
7220 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7221 const_tree type, HOST_WIDE_INT words, bool named)
7223 int int_nregs, sse_nregs, exam;
7225 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7226 if (!named && (VALID_AVX512F_REG_MODE (mode)
7227 || VALID_AVX256_REG_MODE (mode)))
7228 return 0;
7230 exam = examine_argument (mode, type, 0, &int_nregs, &sse_nregs);
7232 if (exam
7233 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7235 cum->nregs -= int_nregs;
7236 cum->sse_nregs -= sse_nregs;
7237 cum->regno += int_nregs;
7238 cum->sse_regno += sse_nregs;
7239 return int_nregs;
7241 else
7243 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7244 cum->words = (cum->words + align - 1) & ~(align - 1);
7245 cum->words += words;
7246 return 0;
7250 static int
7251 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7252 HOST_WIDE_INT words)
7254 /* Otherwise, this should be passed indirect. */
7255 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7257 cum->words += words;
7258 if (cum->nregs > 0)
7260 cum->nregs -= 1;
7261 cum->regno += 1;
7262 return 1;
7264 return 0;
7267 /* Update the data in CUM to advance over an argument of mode MODE and
7268 data type TYPE. (TYPE is null for libcalls where that information
7269 may not be available.) */
7271 static void
7272 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7273 const_tree type, bool named)
7275 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7276 HOST_WIDE_INT bytes, words;
7277 int nregs;
7279 if (mode == BLKmode)
7280 bytes = int_size_in_bytes (type);
7281 else
7282 bytes = GET_MODE_SIZE (mode);
7283 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7285 if (type)
7286 mode = type_natural_mode (type, NULL, false);
7288 if ((type && POINTER_BOUNDS_TYPE_P (type))
7289 || POINTER_BOUNDS_MODE_P (mode))
7291 /* If we pass bounds in BT then just update remained bounds count. */
7292 if (cum->bnds_in_bt)
7294 cum->bnds_in_bt--;
7295 return;
7298 /* Update remained number of bounds to force. */
7299 if (cum->force_bnd_pass)
7300 cum->force_bnd_pass--;
7302 cum->bnd_regno++;
7304 return;
7307 /* The first arg not going to Bounds Tables resets this counter. */
7308 cum->bnds_in_bt = 0;
7309 /* For unnamed args we always pass bounds to avoid bounds mess when
7310 passed and received types do not match. If bounds do not follow
7311 unnamed arg, still pretend required number of bounds were passed. */
7312 if (cum->force_bnd_pass)
7314 cum->bnd_regno += cum->force_bnd_pass;
7315 cum->force_bnd_pass = 0;
7318 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7319 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7320 else if (TARGET_64BIT)
7321 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7322 else
7323 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7325 /* For stdarg we expect bounds to be passed for each value passed
7326 in register. */
7327 if (cum->stdarg)
7328 cum->force_bnd_pass = nregs;
7329 /* For pointers passed in memory we expect bounds passed in Bounds
7330 Table. */
7331 if (!nregs)
7332 cum->bnds_in_bt = chkp_type_bounds_count (type);
7335 /* Define where to put the arguments to a function.
7336 Value is zero to push the argument on the stack,
7337 or a hard register in which to store the argument.
7339 MODE is the argument's machine mode.
7340 TYPE is the data type of the argument (as a tree).
7341 This is null for libcalls where that information may
7342 not be available.
7343 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7344 the preceding args and about the function being called.
7345 NAMED is nonzero if this argument is a named parameter
7346 (otherwise it is an extra parameter matching an ellipsis). */
7348 static rtx
7349 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7350 enum machine_mode orig_mode, const_tree type,
7351 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7353 /* Avoid the AL settings for the Unix64 ABI. */
7354 if (mode == VOIDmode)
7355 return constm1_rtx;
7357 switch (mode)
7359 default:
7360 break;
7362 case BLKmode:
7363 if (bytes < 0)
7364 break;
7365 /* FALLTHRU */
7366 case DImode:
7367 case SImode:
7368 case HImode:
7369 case QImode:
7370 if (words <= cum->nregs)
7372 int regno = cum->regno;
7374 /* Fastcall allocates the first two DWORD (SImode) or
7375 smaller arguments to ECX and EDX if it isn't an
7376 aggregate type . */
7377 if (cum->fastcall)
7379 if (mode == BLKmode
7380 || mode == DImode
7381 || (type && AGGREGATE_TYPE_P (type)))
7382 break;
7384 /* ECX not EAX is the first allocated register. */
7385 if (regno == AX_REG)
7386 regno = CX_REG;
7388 return gen_rtx_REG (mode, regno);
7390 break;
7392 case DFmode:
7393 if (cum->float_in_sse < 2)
7394 break;
7395 case SFmode:
7396 if (cum->float_in_sse < 1)
7397 break;
7398 /* FALLTHRU */
7399 case TImode:
7400 /* In 32bit, we pass TImode in xmm registers. */
7401 case V16QImode:
7402 case V8HImode:
7403 case V4SImode:
7404 case V2DImode:
7405 case V4SFmode:
7406 case V2DFmode:
7407 if (!type || !AGGREGATE_TYPE_P (type))
7409 if (cum->sse_nregs)
7410 return gen_reg_or_parallel (mode, orig_mode,
7411 cum->sse_regno + FIRST_SSE_REG);
7413 break;
7415 case OImode:
7416 case XImode:
7417 /* OImode and XImode shouldn't be used directly. */
7418 gcc_unreachable ();
7420 case V64QImode:
7421 case V32HImode:
7422 case V16SImode:
7423 case V8DImode:
7424 case V16SFmode:
7425 case V8DFmode:
7426 case V8SFmode:
7427 case V8SImode:
7428 case V32QImode:
7429 case V16HImode:
7430 case V4DFmode:
7431 case V4DImode:
7432 if (!type || !AGGREGATE_TYPE_P (type))
7434 if (cum->sse_nregs)
7435 return gen_reg_or_parallel (mode, orig_mode,
7436 cum->sse_regno + FIRST_SSE_REG);
7438 break;
7440 case V8QImode:
7441 case V4HImode:
7442 case V2SImode:
7443 case V2SFmode:
7444 case V1TImode:
7445 case V1DImode:
7446 if (!type || !AGGREGATE_TYPE_P (type))
7448 if (cum->mmx_nregs)
7449 return gen_reg_or_parallel (mode, orig_mode,
7450 cum->mmx_regno + FIRST_MMX_REG);
7452 break;
7455 return NULL_RTX;
7458 static rtx
7459 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7460 enum machine_mode orig_mode, const_tree type, bool named)
7462 /* Handle a hidden AL argument containing number of registers
7463 for varargs x86-64 functions. */
7464 if (mode == VOIDmode)
7465 return GEN_INT (cum->maybe_vaarg
7466 ? (cum->sse_nregs < 0
7467 ? X86_64_SSE_REGPARM_MAX
7468 : cum->sse_regno)
7469 : -1);
7471 switch (mode)
7473 default:
7474 break;
7476 case V8SFmode:
7477 case V8SImode:
7478 case V32QImode:
7479 case V16HImode:
7480 case V4DFmode:
7481 case V4DImode:
7482 case V16SFmode:
7483 case V16SImode:
7484 case V64QImode:
7485 case V32HImode:
7486 case V8DFmode:
7487 case V8DImode:
7488 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7489 if (!named)
7490 return NULL;
7491 break;
7494 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7495 cum->sse_nregs,
7496 &x86_64_int_parameter_registers [cum->regno],
7497 cum->sse_regno);
7500 static rtx
7501 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7502 enum machine_mode orig_mode, bool named,
7503 HOST_WIDE_INT bytes)
7505 unsigned int regno;
7507 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7508 We use value of -2 to specify that current function call is MSABI. */
7509 if (mode == VOIDmode)
7510 return GEN_INT (-2);
7512 /* If we've run out of registers, it goes on the stack. */
7513 if (cum->nregs == 0)
7514 return NULL_RTX;
7516 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7518 /* Only floating point modes are passed in anything but integer regs. */
7519 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7521 if (named)
7522 regno = cum->regno + FIRST_SSE_REG;
7523 else
7525 rtx t1, t2;
7527 /* Unnamed floating parameters are passed in both the
7528 SSE and integer registers. */
7529 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7530 t2 = gen_rtx_REG (mode, regno);
7531 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7532 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7533 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7536 /* Handle aggregated types passed in register. */
7537 if (orig_mode == BLKmode)
7539 if (bytes > 0 && bytes <= 8)
7540 mode = (bytes > 4 ? DImode : SImode);
7541 if (mode == BLKmode)
7542 mode = DImode;
7545 return gen_reg_or_parallel (mode, orig_mode, regno);
7548 /* Return where to put the arguments to a function.
7549 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7551 MODE is the argument's machine mode. TYPE is the data type of the
7552 argument. It is null for libcalls where that information may not be
7553 available. CUM gives information about the preceding args and about
7554 the function being called. NAMED is nonzero if this argument is a
7555 named parameter (otherwise it is an extra parameter matching an
7556 ellipsis). */
7558 static rtx
7559 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7560 const_tree type, bool named)
7562 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7563 enum machine_mode mode = omode;
7564 HOST_WIDE_INT bytes, words;
7565 rtx arg;
7567 if (mode == BLKmode)
7568 bytes = int_size_in_bytes (type);
7569 else
7570 bytes = GET_MODE_SIZE (mode);
7571 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7574 if ((type && POINTER_BOUNDS_TYPE_P (type))
7575 || POINTER_BOUNDS_MODE_P (mode))
7577 if (cum->bnds_in_bt)
7578 arg = NULL;
7579 else if (cum->bnd_regno <= LAST_BND_REG)
7580 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7581 else
7582 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7584 else
7586 /* To simplify the code below, represent vector types with a vector mode
7587 even if MMX/SSE are not active. */
7588 if (type && TREE_CODE (type) == VECTOR_TYPE)
7589 mode = type_natural_mode (type, cum, false);
7591 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7592 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7593 else if (TARGET_64BIT)
7594 arg = function_arg_64 (cum, mode, omode, type, named);
7595 else
7596 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7599 return arg;
7602 /* A C expression that indicates when an argument must be passed by
7603 reference. If nonzero for an argument, a copy of that argument is
7604 made in memory and a pointer to the argument is passed instead of
7605 the argument itself. The pointer is passed in whatever way is
7606 appropriate for passing a pointer to that type. */
7608 static bool
7609 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7610 const_tree type, bool named ATTRIBUTE_UNUSED)
7612 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7614 /* See Windows x64 Software Convention. */
7615 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7617 int msize = (int) GET_MODE_SIZE (mode);
7618 if (type)
7620 /* Arrays are passed by reference. */
7621 if (TREE_CODE (type) == ARRAY_TYPE)
7622 return true;
7624 if (AGGREGATE_TYPE_P (type))
7626 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7627 are passed by reference. */
7628 msize = int_size_in_bytes (type);
7632 /* __m128 is passed by reference. */
7633 switch (msize) {
7634 case 1: case 2: case 4: case 8:
7635 break;
7636 default:
7637 return true;
7640 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7641 return 1;
7643 return 0;
7646 /* Return true when TYPE should be 128bit aligned for 32bit argument
7647 passing ABI. XXX: This function is obsolete and is only used for
7648 checking psABI compatibility with previous versions of GCC. */
7650 static bool
7651 ix86_compat_aligned_value_p (const_tree type)
7653 enum machine_mode mode = TYPE_MODE (type);
7654 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7655 || mode == TDmode
7656 || mode == TFmode
7657 || mode == TCmode)
7658 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7659 return true;
7660 if (TYPE_ALIGN (type) < 128)
7661 return false;
7663 if (AGGREGATE_TYPE_P (type))
7665 /* Walk the aggregates recursively. */
7666 switch (TREE_CODE (type))
7668 case RECORD_TYPE:
7669 case UNION_TYPE:
7670 case QUAL_UNION_TYPE:
7672 tree field;
7674 /* Walk all the structure fields. */
7675 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7677 if (TREE_CODE (field) == FIELD_DECL
7678 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7679 return true;
7681 break;
7684 case ARRAY_TYPE:
7685 /* Just for use if some languages passes arrays by value. */
7686 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7687 return true;
7688 break;
7690 default:
7691 gcc_unreachable ();
7694 return false;
7697 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7698 XXX: This function is obsolete and is only used for checking psABI
7699 compatibility with previous versions of GCC. */
7701 static unsigned int
7702 ix86_compat_function_arg_boundary (enum machine_mode mode,
7703 const_tree type, unsigned int align)
7705 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7706 natural boundaries. */
7707 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7709 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7710 make an exception for SSE modes since these require 128bit
7711 alignment.
7713 The handling here differs from field_alignment. ICC aligns MMX
7714 arguments to 4 byte boundaries, while structure fields are aligned
7715 to 8 byte boundaries. */
7716 if (!type)
7718 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7719 align = PARM_BOUNDARY;
7721 else
7723 if (!ix86_compat_aligned_value_p (type))
7724 align = PARM_BOUNDARY;
7727 if (align > BIGGEST_ALIGNMENT)
7728 align = BIGGEST_ALIGNMENT;
7729 return align;
7732 /* Return true when TYPE should be 128bit aligned for 32bit argument
7733 passing ABI. */
7735 static bool
7736 ix86_contains_aligned_value_p (const_tree type)
7738 enum machine_mode mode = TYPE_MODE (type);
7740 if (mode == XFmode || mode == XCmode)
7741 return false;
7743 if (TYPE_ALIGN (type) < 128)
7744 return false;
7746 if (AGGREGATE_TYPE_P (type))
7748 /* Walk the aggregates recursively. */
7749 switch (TREE_CODE (type))
7751 case RECORD_TYPE:
7752 case UNION_TYPE:
7753 case QUAL_UNION_TYPE:
7755 tree field;
7757 /* Walk all the structure fields. */
7758 for (field = TYPE_FIELDS (type);
7759 field;
7760 field = DECL_CHAIN (field))
7762 if (TREE_CODE (field) == FIELD_DECL
7763 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7764 return true;
7766 break;
7769 case ARRAY_TYPE:
7770 /* Just for use if some languages passes arrays by value. */
7771 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7772 return true;
7773 break;
7775 default:
7776 gcc_unreachable ();
7779 else
7780 return TYPE_ALIGN (type) >= 128;
7782 return false;
7785 /* Gives the alignment boundary, in bits, of an argument with the
7786 specified mode and type. */
7788 static unsigned int
7789 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7791 unsigned int align;
7792 if (type)
7794 /* Since the main variant type is used for call, we convert it to
7795 the main variant type. */
7796 type = TYPE_MAIN_VARIANT (type);
7797 align = TYPE_ALIGN (type);
7799 else
7800 align = GET_MODE_ALIGNMENT (mode);
7801 if (align < PARM_BOUNDARY)
7802 align = PARM_BOUNDARY;
7803 else
7805 static bool warned;
7806 unsigned int saved_align = align;
7808 if (!TARGET_64BIT)
7810 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7811 if (!type)
7813 if (mode == XFmode || mode == XCmode)
7814 align = PARM_BOUNDARY;
7816 else if (!ix86_contains_aligned_value_p (type))
7817 align = PARM_BOUNDARY;
7819 if (align < 128)
7820 align = PARM_BOUNDARY;
7823 if (warn_psabi
7824 && !warned
7825 && align != ix86_compat_function_arg_boundary (mode, type,
7826 saved_align))
7828 warned = true;
7829 inform (input_location,
7830 "The ABI for passing parameters with %d-byte"
7831 " alignment has changed in GCC 4.6",
7832 align / BITS_PER_UNIT);
7836 return align;
7839 /* Return true if N is a possible register number of function value. */
7841 static bool
7842 ix86_function_value_regno_p (const unsigned int regno)
7844 switch (regno)
7846 case AX_REG:
7847 case DX_REG:
7848 return true;
7849 case DI_REG:
7850 case SI_REG:
7851 return TARGET_64BIT && ix86_abi != MS_ABI;
7853 case FIRST_BND_REG:
7854 return chkp_function_instrumented_p (current_function_decl);
7856 /* Complex values are returned in %st(0)/%st(1) pair. */
7857 case ST0_REG:
7858 case ST1_REG:
7859 /* TODO: The function should depend on current function ABI but
7860 builtins.c would need updating then. Therefore we use the
7861 default ABI. */
7862 if (TARGET_64BIT && ix86_abi == MS_ABI)
7863 return false;
7864 return TARGET_FLOAT_RETURNS_IN_80387;
7866 /* Complex values are returned in %xmm0/%xmm1 pair. */
7867 case XMM0_REG:
7868 case XMM1_REG:
7869 return TARGET_SSE;
7871 case MM0_REG:
7872 if (TARGET_MACHO || TARGET_64BIT)
7873 return false;
7874 return TARGET_MMX;
7877 return false;
7880 /* Define how to find the value returned by a function.
7881 VALTYPE is the data type of the value (as a tree).
7882 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7883 otherwise, FUNC is 0. */
7885 static rtx
7886 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7887 const_tree fntype, const_tree fn)
7889 unsigned int regno;
7891 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7892 we normally prevent this case when mmx is not available. However
7893 some ABIs may require the result to be returned like DImode. */
7894 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7895 regno = FIRST_MMX_REG;
7897 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7898 we prevent this case when sse is not available. However some ABIs
7899 may require the result to be returned like integer TImode. */
7900 else if (mode == TImode
7901 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7902 regno = FIRST_SSE_REG;
7904 /* 32-byte vector modes in %ymm0. */
7905 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7906 regno = FIRST_SSE_REG;
7908 /* 64-byte vector modes in %zmm0. */
7909 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7910 regno = FIRST_SSE_REG;
7912 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7913 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7914 regno = FIRST_FLOAT_REG;
7915 else
7916 /* Most things go in %eax. */
7917 regno = AX_REG;
7919 /* Override FP return register with %xmm0 for local functions when
7920 SSE math is enabled or for functions with sseregparm attribute. */
7921 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7923 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7924 if ((sse_level >= 1 && mode == SFmode)
7925 || (sse_level == 2 && mode == DFmode))
7926 regno = FIRST_SSE_REG;
7929 /* OImode shouldn't be used directly. */
7930 gcc_assert (mode != OImode);
7932 return gen_rtx_REG (orig_mode, regno);
7935 static rtx
7936 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7937 const_tree valtype)
7939 rtx ret;
7941 /* Handle libcalls, which don't provide a type node. */
7942 if (valtype == NULL)
7944 unsigned int regno;
7946 switch (mode)
7948 case SFmode:
7949 case SCmode:
7950 case DFmode:
7951 case DCmode:
7952 case TFmode:
7953 case SDmode:
7954 case DDmode:
7955 case TDmode:
7956 regno = FIRST_SSE_REG;
7957 break;
7958 case XFmode:
7959 case XCmode:
7960 regno = FIRST_FLOAT_REG;
7961 break;
7962 case TCmode:
7963 return NULL;
7964 default:
7965 regno = AX_REG;
7968 return gen_rtx_REG (mode, regno);
7970 else if (POINTER_TYPE_P (valtype))
7972 /* Pointers are always returned in word_mode. */
7973 mode = word_mode;
7976 ret = construct_container (mode, orig_mode, valtype, 1,
7977 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7978 x86_64_int_return_registers, 0);
7980 /* For zero sized structures, construct_container returns NULL, but we
7981 need to keep rest of compiler happy by returning meaningful value. */
7982 if (!ret)
7983 ret = gen_rtx_REG (orig_mode, AX_REG);
7985 return ret;
7988 static rtx
7989 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7990 const_tree valtype)
7992 unsigned int regno = AX_REG;
7994 if (TARGET_SSE)
7996 switch (GET_MODE_SIZE (mode))
7998 case 16:
7999 if (valtype != NULL_TREE
8000 && !VECTOR_INTEGER_TYPE_P (valtype)
8001 && !VECTOR_INTEGER_TYPE_P (valtype)
8002 && !INTEGRAL_TYPE_P (valtype)
8003 && !VECTOR_FLOAT_TYPE_P (valtype))
8004 break;
8005 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8006 && !COMPLEX_MODE_P (mode))
8007 regno = FIRST_SSE_REG;
8008 break;
8009 case 8:
8010 case 4:
8011 if (mode == SFmode || mode == DFmode)
8012 regno = FIRST_SSE_REG;
8013 break;
8014 default:
8015 break;
8018 return gen_rtx_REG (orig_mode, regno);
8021 static rtx
8022 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8023 enum machine_mode orig_mode, enum machine_mode mode)
8025 const_tree fn, fntype;
8027 fn = NULL_TREE;
8028 if (fntype_or_decl && DECL_P (fntype_or_decl))
8029 fn = fntype_or_decl;
8030 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8032 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8033 || POINTER_BOUNDS_MODE_P (mode))
8034 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8035 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8036 return function_value_ms_64 (orig_mode, mode, valtype);
8037 else if (TARGET_64BIT)
8038 return function_value_64 (orig_mode, mode, valtype);
8039 else
8040 return function_value_32 (orig_mode, mode, fntype, fn);
8043 static rtx
8044 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
8045 bool outgoing ATTRIBUTE_UNUSED)
8047 enum machine_mode mode, orig_mode;
8049 orig_mode = TYPE_MODE (valtype);
8050 mode = type_natural_mode (valtype, NULL, true);
8051 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8054 static rtx
8055 ix86_function_value_bounds (const_tree valtype,
8056 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8057 bool outgoing ATTRIBUTE_UNUSED)
8059 rtx res = NULL_RTX;
8061 if (BOUNDED_TYPE_P (valtype))
8062 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8063 else if (chkp_type_has_pointer (valtype))
8065 bitmap slots = chkp_find_bound_slots (valtype);
8066 rtx bounds[2];
8067 bitmap_iterator bi;
8068 unsigned i, bnd_no = 0;
8070 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8072 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8073 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8074 gcc_assert (bnd_no < 2);
8075 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8078 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8079 BITMAP_FREE (slots);
8081 else
8082 res = NULL_RTX;
8084 return res;
8087 /* Pointer function arguments and return values are promoted to
8088 word_mode. */
8090 static enum machine_mode
8091 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8092 int *punsignedp, const_tree fntype,
8093 int for_return)
8095 if (type != NULL_TREE && POINTER_TYPE_P (type))
8097 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8098 return word_mode;
8100 return default_promote_function_mode (type, mode, punsignedp, fntype,
8101 for_return);
8104 /* Return true if a structure, union or array with MODE containing FIELD
8105 should be accessed using BLKmode. */
8107 static bool
8108 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8110 /* Union with XFmode must be in BLKmode. */
8111 return (mode == XFmode
8112 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8113 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8117 ix86_libcall_value (enum machine_mode mode)
8119 return ix86_function_value_1 (NULL, NULL, mode, mode);
8122 /* Return true iff type is returned in memory. */
8124 static bool ATTRIBUTE_UNUSED
8125 return_in_memory_32 (const_tree type, enum machine_mode mode)
8127 HOST_WIDE_INT size;
8129 if (mode == BLKmode)
8130 return true;
8132 size = int_size_in_bytes (type);
8134 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8135 return false;
8137 if (VECTOR_MODE_P (mode) || mode == TImode)
8139 /* User-created vectors small enough to fit in EAX. */
8140 if (size < 8)
8141 return false;
8143 /* MMX/3dNow values are returned in MM0,
8144 except when it doesn't exits or the ABI prescribes otherwise. */
8145 if (size == 8)
8146 return !TARGET_MMX || TARGET_VECT8_RETURNS;
8148 /* SSE values are returned in XMM0, except when it doesn't exist. */
8149 if (size == 16)
8150 return !TARGET_SSE;
8152 /* AVX values are returned in YMM0, except when it doesn't exist. */
8153 if (size == 32)
8154 return !TARGET_AVX;
8156 /* AVX512F values are returned in ZMM0, except when it doesn't exist. */
8157 if (size == 64)
8158 return !TARGET_AVX512F;
8161 if (mode == XFmode)
8162 return false;
8164 if (size > 12)
8165 return true;
8167 /* OImode shouldn't be used directly. */
8168 gcc_assert (mode != OImode);
8170 return false;
8173 static bool ATTRIBUTE_UNUSED
8174 return_in_memory_64 (const_tree type, enum machine_mode mode)
8176 int needed_intregs, needed_sseregs;
8177 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
8180 static bool ATTRIBUTE_UNUSED
8181 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
8183 HOST_WIDE_INT size = int_size_in_bytes (type);
8185 /* __m128 is returned in xmm0. */
8186 if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
8187 || VECTOR_FLOAT_TYPE_P (type))
8188 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8189 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
8190 return false;
8192 /* Otherwise, the size must be exactly in [1248]. */
8193 return size != 1 && size != 2 && size != 4 && size != 8;
8196 static bool
8197 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8199 #ifdef SUBTARGET_RETURN_IN_MEMORY
8200 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8201 #else
8202 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8204 if (POINTER_BOUNDS_TYPE_P (type))
8205 return false;
8207 if (TARGET_64BIT)
8209 if (ix86_function_type_abi (fntype) == MS_ABI)
8210 return return_in_memory_ms_64 (type, mode);
8211 else
8212 return return_in_memory_64 (type, mode);
8214 else
8215 return return_in_memory_32 (type, mode);
8216 #endif
8220 /* Create the va_list data type. */
8222 /* Returns the calling convention specific va_list date type.
8223 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8225 static tree
8226 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8228 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8230 /* For i386 we use plain pointer to argument area. */
8231 if (!TARGET_64BIT || abi == MS_ABI)
8232 return build_pointer_type (char_type_node);
8234 record = lang_hooks.types.make_type (RECORD_TYPE);
8235 type_decl = build_decl (BUILTINS_LOCATION,
8236 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8238 f_gpr = build_decl (BUILTINS_LOCATION,
8239 FIELD_DECL, get_identifier ("gp_offset"),
8240 unsigned_type_node);
8241 f_fpr = build_decl (BUILTINS_LOCATION,
8242 FIELD_DECL, get_identifier ("fp_offset"),
8243 unsigned_type_node);
8244 f_ovf = build_decl (BUILTINS_LOCATION,
8245 FIELD_DECL, get_identifier ("overflow_arg_area"),
8246 ptr_type_node);
8247 f_sav = build_decl (BUILTINS_LOCATION,
8248 FIELD_DECL, get_identifier ("reg_save_area"),
8249 ptr_type_node);
8251 va_list_gpr_counter_field = f_gpr;
8252 va_list_fpr_counter_field = f_fpr;
8254 DECL_FIELD_CONTEXT (f_gpr) = record;
8255 DECL_FIELD_CONTEXT (f_fpr) = record;
8256 DECL_FIELD_CONTEXT (f_ovf) = record;
8257 DECL_FIELD_CONTEXT (f_sav) = record;
8259 TYPE_STUB_DECL (record) = type_decl;
8260 TYPE_NAME (record) = type_decl;
8261 TYPE_FIELDS (record) = f_gpr;
8262 DECL_CHAIN (f_gpr) = f_fpr;
8263 DECL_CHAIN (f_fpr) = f_ovf;
8264 DECL_CHAIN (f_ovf) = f_sav;
8266 layout_type (record);
8268 /* The correct type is an array type of one element. */
8269 return build_array_type (record, build_index_type (size_zero_node));
8272 /* Setup the builtin va_list data type and for 64-bit the additional
8273 calling convention specific va_list data types. */
8275 static tree
8276 ix86_build_builtin_va_list (void)
8278 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8280 /* Initialize abi specific va_list builtin types. */
8281 if (TARGET_64BIT)
8283 tree t;
8284 if (ix86_abi == MS_ABI)
8286 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8287 if (TREE_CODE (t) != RECORD_TYPE)
8288 t = build_variant_type_copy (t);
8289 sysv_va_list_type_node = t;
8291 else
8293 t = ret;
8294 if (TREE_CODE (t) != RECORD_TYPE)
8295 t = build_variant_type_copy (t);
8296 sysv_va_list_type_node = t;
8298 if (ix86_abi != MS_ABI)
8300 t = ix86_build_builtin_va_list_abi (MS_ABI);
8301 if (TREE_CODE (t) != RECORD_TYPE)
8302 t = build_variant_type_copy (t);
8303 ms_va_list_type_node = t;
8305 else
8307 t = ret;
8308 if (TREE_CODE (t) != RECORD_TYPE)
8309 t = build_variant_type_copy (t);
8310 ms_va_list_type_node = t;
8314 return ret;
8317 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8319 static void
8320 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8322 rtx save_area, mem;
8323 alias_set_type set;
8324 int i, max;
8326 /* GPR size of varargs save area. */
8327 if (cfun->va_list_gpr_size)
8328 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8329 else
8330 ix86_varargs_gpr_size = 0;
8332 /* FPR size of varargs save area. We don't need it if we don't pass
8333 anything in SSE registers. */
8334 if (TARGET_SSE && cfun->va_list_fpr_size)
8335 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8336 else
8337 ix86_varargs_fpr_size = 0;
8339 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8340 return;
8342 save_area = frame_pointer_rtx;
8343 set = get_varargs_alias_set ();
8345 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8346 if (max > X86_64_REGPARM_MAX)
8347 max = X86_64_REGPARM_MAX;
8349 for (i = cum->regno; i < max; i++)
8351 mem = gen_rtx_MEM (word_mode,
8352 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8353 MEM_NOTRAP_P (mem) = 1;
8354 set_mem_alias_set (mem, set);
8355 emit_move_insn (mem,
8356 gen_rtx_REG (word_mode,
8357 x86_64_int_parameter_registers[i]));
8360 if (ix86_varargs_fpr_size)
8362 enum machine_mode smode;
8363 rtx label, test;
8365 /* Now emit code to save SSE registers. The AX parameter contains number
8366 of SSE parameter registers used to call this function, though all we
8367 actually check here is the zero/non-zero status. */
8369 label = gen_label_rtx ();
8370 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8371 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8372 label));
8374 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8375 we used movdqa (i.e. TImode) instead? Perhaps even better would
8376 be if we could determine the real mode of the data, via a hook
8377 into pass_stdarg. Ignore all that for now. */
8378 smode = V4SFmode;
8379 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8380 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8382 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8383 if (max > X86_64_SSE_REGPARM_MAX)
8384 max = X86_64_SSE_REGPARM_MAX;
8386 for (i = cum->sse_regno; i < max; ++i)
8388 mem = plus_constant (Pmode, save_area,
8389 i * 16 + ix86_varargs_gpr_size);
8390 mem = gen_rtx_MEM (smode, mem);
8391 MEM_NOTRAP_P (mem) = 1;
8392 set_mem_alias_set (mem, set);
8393 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8395 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8398 emit_label (label);
8402 static void
8403 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8405 alias_set_type set = get_varargs_alias_set ();
8406 int i;
8408 /* Reset to zero, as there might be a sysv vaarg used
8409 before. */
8410 ix86_varargs_gpr_size = 0;
8411 ix86_varargs_fpr_size = 0;
8413 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8415 rtx reg, mem;
8417 mem = gen_rtx_MEM (Pmode,
8418 plus_constant (Pmode, virtual_incoming_args_rtx,
8419 i * UNITS_PER_WORD));
8420 MEM_NOTRAP_P (mem) = 1;
8421 set_mem_alias_set (mem, set);
8423 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8424 emit_move_insn (mem, reg);
8428 static void
8429 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8430 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8431 int no_rtl)
8433 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8434 CUMULATIVE_ARGS next_cum;
8435 tree fntype;
8437 /* This argument doesn't appear to be used anymore. Which is good,
8438 because the old code here didn't suppress rtl generation. */
8439 gcc_assert (!no_rtl);
8441 if (!TARGET_64BIT)
8442 return;
8444 fntype = TREE_TYPE (current_function_decl);
8446 /* For varargs, we do not want to skip the dummy va_dcl argument.
8447 For stdargs, we do want to skip the last named argument. */
8448 next_cum = *cum;
8449 if (stdarg_p (fntype))
8450 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8451 true);
8453 if (cum->call_abi == MS_ABI)
8454 setup_incoming_varargs_ms_64 (&next_cum);
8455 else
8456 setup_incoming_varargs_64 (&next_cum);
8459 static void
8460 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8461 enum machine_mode mode,
8462 tree type,
8463 int *pretend_size ATTRIBUTE_UNUSED,
8464 int no_rtl)
8466 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8467 CUMULATIVE_ARGS next_cum;
8468 tree fntype;
8469 rtx save_area;
8470 int bnd_reg, i, max;
8472 gcc_assert (!no_rtl);
8474 if (!TARGET_64BIT)
8475 return;
8477 fntype = TREE_TYPE (current_function_decl);
8479 /* For varargs, we do not want to skip the dummy va_dcl argument.
8480 For stdargs, we do want to skip the last named argument. */
8481 next_cum = *cum;
8482 if (stdarg_p (fntype))
8483 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8484 true);
8485 if (cum->call_abi == MS_ABI)
8486 return;
8488 save_area = frame_pointer_rtx;
8490 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8491 if (max > X86_64_REGPARM_MAX)
8492 max = X86_64_REGPARM_MAX;
8494 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8495 if (chkp_function_instrumented_p (current_function_decl))
8496 for (i = cum->regno; i < max; i++)
8498 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8499 rtx reg = gen_rtx_REG (DImode,
8500 x86_64_int_parameter_registers[i]);
8501 rtx ptr = reg;
8502 rtx bounds;
8504 if (bnd_reg <= LAST_BND_REG)
8505 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8506 else
8508 rtx ldx_addr = plus_constant (Pmode, arg_pointer_rtx,
8509 (LAST_BND_REG - bnd_reg) * 8);
8510 bounds = gen_reg_rtx (BNDmode);
8511 emit_insn (TARGET_64BIT
8512 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8513 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8516 emit_insn (TARGET_64BIT
8517 ? gen_bnd64_stx (addr, ptr, bounds)
8518 : gen_bnd32_stx (addr, ptr, bounds));
8520 bnd_reg++;
8525 /* Checks if TYPE is of kind va_list char *. */
8527 static bool
8528 is_va_list_char_pointer (tree type)
8530 tree canonic;
8532 /* For 32-bit it is always true. */
8533 if (!TARGET_64BIT)
8534 return true;
8535 canonic = ix86_canonical_va_list_type (type);
8536 return (canonic == ms_va_list_type_node
8537 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8540 /* Implement va_start. */
8542 static void
8543 ix86_va_start (tree valist, rtx nextarg)
8545 HOST_WIDE_INT words, n_gpr, n_fpr;
8546 tree f_gpr, f_fpr, f_ovf, f_sav;
8547 tree gpr, fpr, ovf, sav, t, t1;
8548 tree type;
8549 rtx ovf_rtx;
8551 if (flag_split_stack
8552 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8554 unsigned int scratch_regno;
8556 /* When we are splitting the stack, we can't refer to the stack
8557 arguments using internal_arg_pointer, because they may be on
8558 the old stack. The split stack prologue will arrange to
8559 leave a pointer to the old stack arguments in a scratch
8560 register, which we here copy to a pseudo-register. The split
8561 stack prologue can't set the pseudo-register directly because
8562 it (the prologue) runs before any registers have been saved. */
8564 scratch_regno = split_stack_prologue_scratch_regno ();
8565 if (scratch_regno != INVALID_REGNUM)
8567 rtx reg, seq;
8569 reg = gen_reg_rtx (Pmode);
8570 cfun->machine->split_stack_varargs_pointer = reg;
8572 start_sequence ();
8573 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8574 seq = get_insns ();
8575 end_sequence ();
8577 push_topmost_sequence ();
8578 emit_insn_after (seq, entry_of_function ());
8579 pop_topmost_sequence ();
8583 /* Only 64bit target needs something special. */
8584 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8586 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8587 std_expand_builtin_va_start (valist, nextarg);
8588 else
8590 rtx va_r, next;
8592 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8593 next = expand_binop (ptr_mode, add_optab,
8594 cfun->machine->split_stack_varargs_pointer,
8595 crtl->args.arg_offset_rtx,
8596 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8597 convert_move (va_r, next, 0);
8599 /* Store zero bounds for va_list. */
8600 if (chkp_function_instrumented_p (current_function_decl))
8601 chkp_expand_bounds_reset_for_mem (valist,
8602 make_tree (TREE_TYPE (valist),
8603 next));
8606 return;
8609 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8610 f_fpr = DECL_CHAIN (f_gpr);
8611 f_ovf = DECL_CHAIN (f_fpr);
8612 f_sav = DECL_CHAIN (f_ovf);
8614 valist = build_simple_mem_ref (valist);
8615 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8616 /* The following should be folded into the MEM_REF offset. */
8617 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8618 f_gpr, NULL_TREE);
8619 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8620 f_fpr, NULL_TREE);
8621 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8622 f_ovf, NULL_TREE);
8623 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8624 f_sav, NULL_TREE);
8626 /* Count number of gp and fp argument registers used. */
8627 words = crtl->args.info.words;
8628 n_gpr = crtl->args.info.regno;
8629 n_fpr = crtl->args.info.sse_regno;
8631 if (cfun->va_list_gpr_size)
8633 type = TREE_TYPE (gpr);
8634 t = build2 (MODIFY_EXPR, type,
8635 gpr, build_int_cst (type, n_gpr * 8));
8636 TREE_SIDE_EFFECTS (t) = 1;
8637 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8640 if (TARGET_SSE && cfun->va_list_fpr_size)
8642 type = TREE_TYPE (fpr);
8643 t = build2 (MODIFY_EXPR, type, fpr,
8644 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8645 TREE_SIDE_EFFECTS (t) = 1;
8646 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8649 /* Find the overflow area. */
8650 type = TREE_TYPE (ovf);
8651 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8652 ovf_rtx = crtl->args.internal_arg_pointer;
8653 else
8654 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8655 t = make_tree (type, ovf_rtx);
8656 if (words != 0)
8657 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8658 t1 = t;
8659 t = build2 (MODIFY_EXPR, type, ovf, t);
8660 TREE_SIDE_EFFECTS (t) = 1;
8661 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8663 /* Store zero bounds for overflow area pointer. */
8664 if (chkp_function_instrumented_p (current_function_decl))
8665 chkp_expand_bounds_reset_for_mem (ovf, t1);
8667 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8669 /* Find the register save area.
8670 Prologue of the function save it right above stack frame. */
8671 type = TREE_TYPE (sav);
8672 t = make_tree (type, frame_pointer_rtx);
8673 if (!ix86_varargs_gpr_size)
8674 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8675 t1 = t;
8676 t = build2 (MODIFY_EXPR, type, sav, t);
8677 TREE_SIDE_EFFECTS (t) = 1;
8678 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8680 /* Store zero bounds for save area pointer. */
8681 if (chkp_function_instrumented_p (current_function_decl))
8682 chkp_expand_bounds_reset_for_mem (sav, t1);
8686 /* Implement va_arg. */
8688 static tree
8689 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8690 gimple_seq *post_p)
8692 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8693 tree f_gpr, f_fpr, f_ovf, f_sav;
8694 tree gpr, fpr, ovf, sav, t;
8695 int size, rsize;
8696 tree lab_false, lab_over = NULL_TREE;
8697 tree addr, t2;
8698 rtx container;
8699 int indirect_p = 0;
8700 tree ptrtype;
8701 enum machine_mode nat_mode;
8702 unsigned int arg_boundary;
8704 /* Only 64bit target needs something special. */
8705 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8706 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8708 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8709 f_fpr = DECL_CHAIN (f_gpr);
8710 f_ovf = DECL_CHAIN (f_fpr);
8711 f_sav = DECL_CHAIN (f_ovf);
8713 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8714 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8715 valist = build_va_arg_indirect_ref (valist);
8716 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8717 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8718 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8720 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8721 if (indirect_p)
8722 type = build_pointer_type (type);
8723 size = int_size_in_bytes (type);
8724 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8726 nat_mode = type_natural_mode (type, NULL, false);
8727 switch (nat_mode)
8729 case V8SFmode:
8730 case V8SImode:
8731 case V32QImode:
8732 case V16HImode:
8733 case V4DFmode:
8734 case V4DImode:
8735 case V16SFmode:
8736 case V16SImode:
8737 case V64QImode:
8738 case V32HImode:
8739 case V8DFmode:
8740 case V8DImode:
8741 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8742 if (!TARGET_64BIT_MS_ABI)
8744 container = NULL;
8745 break;
8748 default:
8749 container = construct_container (nat_mode, TYPE_MODE (type),
8750 type, 0, X86_64_REGPARM_MAX,
8751 X86_64_SSE_REGPARM_MAX, intreg,
8753 break;
8756 /* Pull the value out of the saved registers. */
8758 addr = create_tmp_var (ptr_type_node, "addr");
8760 if (container)
8762 int needed_intregs, needed_sseregs;
8763 bool need_temp;
8764 tree int_addr, sse_addr;
8766 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8767 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8769 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8771 need_temp = (!REG_P (container)
8772 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8773 || TYPE_ALIGN (type) > 128));
8775 /* In case we are passing structure, verify that it is consecutive block
8776 on the register save area. If not we need to do moves. */
8777 if (!need_temp && !REG_P (container))
8779 /* Verify that all registers are strictly consecutive */
8780 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8782 int i;
8784 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8786 rtx slot = XVECEXP (container, 0, i);
8787 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8788 || INTVAL (XEXP (slot, 1)) != i * 16)
8789 need_temp = 1;
8792 else
8794 int i;
8796 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8798 rtx slot = XVECEXP (container, 0, i);
8799 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8800 || INTVAL (XEXP (slot, 1)) != i * 8)
8801 need_temp = 1;
8805 if (!need_temp)
8807 int_addr = addr;
8808 sse_addr = addr;
8810 else
8812 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8813 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8816 /* First ensure that we fit completely in registers. */
8817 if (needed_intregs)
8819 t = build_int_cst (TREE_TYPE (gpr),
8820 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8821 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8822 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8823 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8824 gimplify_and_add (t, pre_p);
8826 if (needed_sseregs)
8828 t = build_int_cst (TREE_TYPE (fpr),
8829 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8830 + X86_64_REGPARM_MAX * 8);
8831 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8832 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8833 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8834 gimplify_and_add (t, pre_p);
8837 /* Compute index to start of area used for integer regs. */
8838 if (needed_intregs)
8840 /* int_addr = gpr + sav; */
8841 t = fold_build_pointer_plus (sav, gpr);
8842 gimplify_assign (int_addr, t, pre_p);
8844 if (needed_sseregs)
8846 /* sse_addr = fpr + sav; */
8847 t = fold_build_pointer_plus (sav, fpr);
8848 gimplify_assign (sse_addr, t, pre_p);
8850 if (need_temp)
8852 int i, prev_size = 0;
8853 tree temp = create_tmp_var (type, "va_arg_tmp");
8855 /* addr = &temp; */
8856 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8857 gimplify_assign (addr, t, pre_p);
8859 for (i = 0; i < XVECLEN (container, 0); i++)
8861 rtx slot = XVECEXP (container, 0, i);
8862 rtx reg = XEXP (slot, 0);
8863 enum machine_mode mode = GET_MODE (reg);
8864 tree piece_type;
8865 tree addr_type;
8866 tree daddr_type;
8867 tree src_addr, src;
8868 int src_offset;
8869 tree dest_addr, dest;
8870 int cur_size = GET_MODE_SIZE (mode);
8872 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8873 prev_size = INTVAL (XEXP (slot, 1));
8874 if (prev_size + cur_size > size)
8876 cur_size = size - prev_size;
8877 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8878 if (mode == BLKmode)
8879 mode = QImode;
8881 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8882 if (mode == GET_MODE (reg))
8883 addr_type = build_pointer_type (piece_type);
8884 else
8885 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8886 true);
8887 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8888 true);
8890 if (SSE_REGNO_P (REGNO (reg)))
8892 src_addr = sse_addr;
8893 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8895 else
8897 src_addr = int_addr;
8898 src_offset = REGNO (reg) * 8;
8900 src_addr = fold_convert (addr_type, src_addr);
8901 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8903 dest_addr = fold_convert (daddr_type, addr);
8904 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8905 if (cur_size == GET_MODE_SIZE (mode))
8907 src = build_va_arg_indirect_ref (src_addr);
8908 dest = build_va_arg_indirect_ref (dest_addr);
8910 gimplify_assign (dest, src, pre_p);
8912 else
8914 tree copy
8915 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8916 3, dest_addr, src_addr,
8917 size_int (cur_size));
8918 gimplify_and_add (copy, pre_p);
8920 prev_size += cur_size;
8924 if (needed_intregs)
8926 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8927 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8928 gimplify_assign (gpr, t, pre_p);
8931 if (needed_sseregs)
8933 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8934 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8935 gimplify_assign (fpr, t, pre_p);
8938 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8940 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8943 /* ... otherwise out of the overflow area. */
8945 /* When we align parameter on stack for caller, if the parameter
8946 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8947 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8948 here with caller. */
8949 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8950 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8951 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8953 /* Care for on-stack alignment if needed. */
8954 if (arg_boundary <= 64 || size == 0)
8955 t = ovf;
8956 else
8958 HOST_WIDE_INT align = arg_boundary / 8;
8959 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8960 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8961 build_int_cst (TREE_TYPE (t), -align));
8964 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8965 gimplify_assign (addr, t, pre_p);
8967 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8968 gimplify_assign (unshare_expr (ovf), t, pre_p);
8970 if (container)
8971 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8973 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8974 addr = fold_convert (ptrtype, addr);
8976 if (indirect_p)
8977 addr = build_va_arg_indirect_ref (addr);
8978 return build_va_arg_indirect_ref (addr);
8981 /* Return true if OPNUM's MEM should be matched
8982 in movabs* patterns. */
8984 bool
8985 ix86_check_movabs (rtx insn, int opnum)
8987 rtx set, mem;
8989 set = PATTERN (insn);
8990 if (GET_CODE (set) == PARALLEL)
8991 set = XVECEXP (set, 0, 0);
8992 gcc_assert (GET_CODE (set) == SET);
8993 mem = XEXP (set, opnum);
8994 while (GET_CODE (mem) == SUBREG)
8995 mem = SUBREG_REG (mem);
8996 gcc_assert (MEM_P (mem));
8997 return volatile_ok || !MEM_VOLATILE_P (mem);
9000 /* Initialize the table of extra 80387 mathematical constants. */
9002 static void
9003 init_ext_80387_constants (void)
9005 static const char * cst[5] =
9007 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9008 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9009 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9010 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9011 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9013 int i;
9015 for (i = 0; i < 5; i++)
9017 real_from_string (&ext_80387_constants_table[i], cst[i]);
9018 /* Ensure each constant is rounded to XFmode precision. */
9019 real_convert (&ext_80387_constants_table[i],
9020 XFmode, &ext_80387_constants_table[i]);
9023 ext_80387_constants_init = 1;
9026 /* Return non-zero if the constant is something that
9027 can be loaded with a special instruction. */
9030 standard_80387_constant_p (rtx x)
9032 enum machine_mode mode = GET_MODE (x);
9034 REAL_VALUE_TYPE r;
9036 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9037 return -1;
9039 if (x == CONST0_RTX (mode))
9040 return 1;
9041 if (x == CONST1_RTX (mode))
9042 return 2;
9044 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9046 /* For XFmode constants, try to find a special 80387 instruction when
9047 optimizing for size or on those CPUs that benefit from them. */
9048 if (mode == XFmode
9049 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9051 int i;
9053 if (! ext_80387_constants_init)
9054 init_ext_80387_constants ();
9056 for (i = 0; i < 5; i++)
9057 if (real_identical (&r, &ext_80387_constants_table[i]))
9058 return i + 3;
9061 /* Load of the constant -0.0 or -1.0 will be split as
9062 fldz;fchs or fld1;fchs sequence. */
9063 if (real_isnegzero (&r))
9064 return 8;
9065 if (real_identical (&r, &dconstm1))
9066 return 9;
9068 return 0;
9071 /* Return the opcode of the special instruction to be used to load
9072 the constant X. */
9074 const char *
9075 standard_80387_constant_opcode (rtx x)
9077 switch (standard_80387_constant_p (x))
9079 case 1:
9080 return "fldz";
9081 case 2:
9082 return "fld1";
9083 case 3:
9084 return "fldlg2";
9085 case 4:
9086 return "fldln2";
9087 case 5:
9088 return "fldl2e";
9089 case 6:
9090 return "fldl2t";
9091 case 7:
9092 return "fldpi";
9093 case 8:
9094 case 9:
9095 return "#";
9096 default:
9097 gcc_unreachable ();
9101 /* Return the CONST_DOUBLE representing the 80387 constant that is
9102 loaded by the specified special instruction. The argument IDX
9103 matches the return value from standard_80387_constant_p. */
9106 standard_80387_constant_rtx (int idx)
9108 int i;
9110 if (! ext_80387_constants_init)
9111 init_ext_80387_constants ();
9113 switch (idx)
9115 case 3:
9116 case 4:
9117 case 5:
9118 case 6:
9119 case 7:
9120 i = idx - 3;
9121 break;
9123 default:
9124 gcc_unreachable ();
9127 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9128 XFmode);
9131 /* Return 1 if X is all 0s and 2 if x is all 1s
9132 in supported SSE/AVX vector mode. */
9135 standard_sse_constant_p (rtx x)
9137 enum machine_mode mode = GET_MODE (x);
9139 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9140 return 1;
9141 if (vector_all_ones_operand (x, mode))
9142 switch (mode)
9144 case V16QImode:
9145 case V8HImode:
9146 case V4SImode:
9147 case V2DImode:
9148 if (TARGET_SSE2)
9149 return 2;
9150 case V32QImode:
9151 case V16HImode:
9152 case V8SImode:
9153 case V4DImode:
9154 if (TARGET_AVX2)
9155 return 2;
9156 case V64QImode:
9157 case V32HImode:
9158 case V16SImode:
9159 case V8DImode:
9160 if (TARGET_AVX512F)
9161 return 2;
9162 default:
9163 break;
9166 return 0;
9169 /* Return the opcode of the special instruction to be used to load
9170 the constant X. */
9172 const char *
9173 standard_sse_constant_opcode (rtx insn, rtx x)
9175 switch (standard_sse_constant_p (x))
9177 case 1:
9178 switch (get_attr_mode (insn))
9180 case MODE_XI:
9181 case MODE_V16SF:
9182 return "vpxord\t%g0, %g0, %g0";
9183 case MODE_V8DF:
9184 return "vpxorq\t%g0, %g0, %g0";
9185 case MODE_TI:
9186 return "%vpxor\t%0, %d0";
9187 case MODE_V2DF:
9188 return "%vxorpd\t%0, %d0";
9189 case MODE_V4SF:
9190 return "%vxorps\t%0, %d0";
9192 case MODE_OI:
9193 return "vpxor\t%x0, %x0, %x0";
9194 case MODE_V4DF:
9195 return "vxorpd\t%x0, %x0, %x0";
9196 case MODE_V8SF:
9197 return "vxorps\t%x0, %x0, %x0";
9199 default:
9200 break;
9203 case 2:
9204 if (get_attr_mode (insn) == MODE_XI
9205 || get_attr_mode (insn) == MODE_V8DF
9206 || get_attr_mode (insn) == MODE_V16SF)
9207 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9208 if (TARGET_AVX)
9209 return "vpcmpeqd\t%0, %0, %0";
9210 else
9211 return "pcmpeqd\t%0, %0";
9213 default:
9214 break;
9216 gcc_unreachable ();
9219 /* Returns true if OP contains a symbol reference */
9221 bool
9222 symbolic_reference_mentioned_p (rtx op)
9224 const char *fmt;
9225 int i;
9227 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9228 return true;
9230 fmt = GET_RTX_FORMAT (GET_CODE (op));
9231 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9233 if (fmt[i] == 'E')
9235 int j;
9237 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9238 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9239 return true;
9242 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9243 return true;
9246 return false;
9249 /* Return true if it is appropriate to emit `ret' instructions in the
9250 body of a function. Do this only if the epilogue is simple, needing a
9251 couple of insns. Prior to reloading, we can't tell how many registers
9252 must be saved, so return false then. Return false if there is no frame
9253 marker to de-allocate. */
9255 bool
9256 ix86_can_use_return_insn_p (void)
9258 struct ix86_frame frame;
9260 if (! reload_completed || frame_pointer_needed)
9261 return 0;
9263 /* Don't allow more than 32k pop, since that's all we can do
9264 with one instruction. */
9265 if (crtl->args.pops_args && crtl->args.size >= 32768)
9266 return 0;
9268 ix86_compute_frame_layout (&frame);
9269 return (frame.stack_pointer_offset == UNITS_PER_WORD
9270 && (frame.nregs + frame.nsseregs) == 0);
9273 /* Value should be nonzero if functions must have frame pointers.
9274 Zero means the frame pointer need not be set up (and parms may
9275 be accessed via the stack pointer) in functions that seem suitable. */
9277 static bool
9278 ix86_frame_pointer_required (void)
9280 /* If we accessed previous frames, then the generated code expects
9281 to be able to access the saved ebp value in our frame. */
9282 if (cfun->machine->accesses_prev_frame)
9283 return true;
9285 /* Several x86 os'es need a frame pointer for other reasons,
9286 usually pertaining to setjmp. */
9287 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9288 return true;
9290 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9291 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9292 return true;
9294 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9295 allocation is 4GB. */
9296 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9297 return true;
9299 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9300 turns off the frame pointer by default. Turn it back on now if
9301 we've not got a leaf function. */
9302 if (TARGET_OMIT_LEAF_FRAME_POINTER
9303 && (!crtl->is_leaf
9304 || ix86_current_function_calls_tls_descriptor))
9305 return true;
9307 if (crtl->profile && !flag_fentry)
9308 return true;
9310 return false;
9313 /* Record that the current function accesses previous call frames. */
9315 void
9316 ix86_setup_frame_addresses (void)
9318 cfun->machine->accesses_prev_frame = 1;
9321 #ifndef USE_HIDDEN_LINKONCE
9322 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9323 # define USE_HIDDEN_LINKONCE 1
9324 # else
9325 # define USE_HIDDEN_LINKONCE 0
9326 # endif
9327 #endif
9329 static int pic_labels_used;
9331 /* Fills in the label name that should be used for a pc thunk for
9332 the given register. */
9334 static void
9335 get_pc_thunk_name (char name[32], unsigned int regno)
9337 gcc_assert (!TARGET_64BIT);
9339 if (USE_HIDDEN_LINKONCE)
9340 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9341 else
9342 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9346 /* This function generates code for -fpic that loads %ebx with
9347 the return address of the caller and then returns. */
9349 static void
9350 ix86_code_end (void)
9352 rtx xops[2];
9353 int regno;
9355 for (regno = AX_REG; regno <= SP_REG; regno++)
9357 char name[32];
9358 tree decl;
9360 if (!(pic_labels_used & (1 << regno)))
9361 continue;
9363 get_pc_thunk_name (name, regno);
9365 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9366 get_identifier (name),
9367 build_function_type_list (void_type_node, NULL_TREE));
9368 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9369 NULL_TREE, void_type_node);
9370 TREE_PUBLIC (decl) = 1;
9371 TREE_STATIC (decl) = 1;
9372 DECL_IGNORED_P (decl) = 1;
9374 #if TARGET_MACHO
9375 if (TARGET_MACHO)
9377 switch_to_section (darwin_sections[text_coal_section]);
9378 fputs ("\t.weak_definition\t", asm_out_file);
9379 assemble_name (asm_out_file, name);
9380 fputs ("\n\t.private_extern\t", asm_out_file);
9381 assemble_name (asm_out_file, name);
9382 putc ('\n', asm_out_file);
9383 ASM_OUTPUT_LABEL (asm_out_file, name);
9384 DECL_WEAK (decl) = 1;
9386 else
9387 #endif
9388 if (USE_HIDDEN_LINKONCE)
9390 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9392 targetm.asm_out.unique_section (decl, 0);
9393 switch_to_section (get_named_section (decl, NULL, 0));
9395 targetm.asm_out.globalize_label (asm_out_file, name);
9396 fputs ("\t.hidden\t", asm_out_file);
9397 assemble_name (asm_out_file, name);
9398 putc ('\n', asm_out_file);
9399 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9401 else
9403 switch_to_section (text_section);
9404 ASM_OUTPUT_LABEL (asm_out_file, name);
9407 DECL_INITIAL (decl) = make_node (BLOCK);
9408 current_function_decl = decl;
9409 init_function_start (decl);
9410 first_function_block_is_cold = false;
9411 /* Make sure unwind info is emitted for the thunk if needed. */
9412 final_start_function (emit_barrier (), asm_out_file, 1);
9414 /* Pad stack IP move with 4 instructions (two NOPs count
9415 as one instruction). */
9416 if (TARGET_PAD_SHORT_FUNCTION)
9418 int i = 8;
9420 while (i--)
9421 fputs ("\tnop\n", asm_out_file);
9424 xops[0] = gen_rtx_REG (Pmode, regno);
9425 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9426 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9427 output_asm_insn ("%!ret", NULL);
9428 final_end_function ();
9429 init_insn_lengths ();
9430 free_after_compilation (cfun);
9431 set_cfun (NULL);
9432 current_function_decl = NULL;
9435 if (flag_split_stack)
9436 file_end_indicate_split_stack ();
9439 /* Emit code for the SET_GOT patterns. */
9441 const char *
9442 output_set_got (rtx dest, rtx label)
9444 rtx xops[3];
9446 xops[0] = dest;
9448 if (TARGET_VXWORKS_RTP && flag_pic)
9450 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9451 xops[2] = gen_rtx_MEM (Pmode,
9452 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9453 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9455 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9456 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9457 an unadorned address. */
9458 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9459 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9460 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9461 return "";
9464 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9466 if (!flag_pic)
9468 if (TARGET_MACHO)
9469 /* We don't need a pic base, we're not producing pic. */
9470 gcc_unreachable ();
9472 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9473 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9474 targetm.asm_out.internal_label (asm_out_file, "L",
9475 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9477 else
9479 char name[32];
9480 get_pc_thunk_name (name, REGNO (dest));
9481 pic_labels_used |= 1 << REGNO (dest);
9483 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9484 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9485 output_asm_insn ("%!call\t%X2", xops);
9487 #if TARGET_MACHO
9488 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9489 This is what will be referenced by the Mach-O PIC subsystem. */
9490 if (machopic_should_output_picbase_label () || !label)
9491 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9493 /* When we are restoring the pic base at the site of a nonlocal label,
9494 and we decided to emit the pic base above, we will still output a
9495 local label used for calculating the correction offset (even though
9496 the offset will be 0 in that case). */
9497 if (label)
9498 targetm.asm_out.internal_label (asm_out_file, "L",
9499 CODE_LABEL_NUMBER (label));
9500 #endif
9503 if (!TARGET_MACHO)
9504 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9506 return "";
9509 /* Generate an "push" pattern for input ARG. */
9511 static rtx
9512 gen_push (rtx arg)
9514 struct machine_function *m = cfun->machine;
9516 if (m->fs.cfa_reg == stack_pointer_rtx)
9517 m->fs.cfa_offset += UNITS_PER_WORD;
9518 m->fs.sp_offset += UNITS_PER_WORD;
9520 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9521 arg = gen_rtx_REG (word_mode, REGNO (arg));
9523 return gen_rtx_SET (VOIDmode,
9524 gen_rtx_MEM (word_mode,
9525 gen_rtx_PRE_DEC (Pmode,
9526 stack_pointer_rtx)),
9527 arg);
9530 /* Generate an "pop" pattern for input ARG. */
9532 static rtx
9533 gen_pop (rtx arg)
9535 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9536 arg = gen_rtx_REG (word_mode, REGNO (arg));
9538 return gen_rtx_SET (VOIDmode,
9539 arg,
9540 gen_rtx_MEM (word_mode,
9541 gen_rtx_POST_INC (Pmode,
9542 stack_pointer_rtx)));
9545 /* Return >= 0 if there is an unused call-clobbered register available
9546 for the entire function. */
9548 static unsigned int
9549 ix86_select_alt_pic_regnum (void)
9551 if (crtl->is_leaf
9552 && !crtl->profile
9553 && !ix86_current_function_calls_tls_descriptor)
9555 int i, drap;
9556 /* Can't use the same register for both PIC and DRAP. */
9557 if (crtl->drap_reg)
9558 drap = REGNO (crtl->drap_reg);
9559 else
9560 drap = -1;
9561 for (i = 2; i >= 0; --i)
9562 if (i != drap && !df_regs_ever_live_p (i))
9563 return i;
9566 return INVALID_REGNUM;
9569 /* Return TRUE if we need to save REGNO. */
9571 static bool
9572 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9574 if (pic_offset_table_rtx
9575 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9576 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9577 || crtl->profile
9578 || crtl->calls_eh_return
9579 || crtl->uses_const_pool
9580 || cfun->has_nonlocal_label))
9581 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9583 if (crtl->calls_eh_return && maybe_eh_return)
9585 unsigned i;
9586 for (i = 0; ; i++)
9588 unsigned test = EH_RETURN_DATA_REGNO (i);
9589 if (test == INVALID_REGNUM)
9590 break;
9591 if (test == regno)
9592 return true;
9596 if (crtl->drap_reg
9597 && regno == REGNO (crtl->drap_reg)
9598 && !cfun->machine->no_drap_save_restore)
9599 return true;
9601 return (df_regs_ever_live_p (regno)
9602 && !call_used_regs[regno]
9603 && !fixed_regs[regno]
9604 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9607 /* Return number of saved general prupose registers. */
9609 static int
9610 ix86_nsaved_regs (void)
9612 int nregs = 0;
9613 int regno;
9615 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9616 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9617 nregs ++;
9618 return nregs;
9621 /* Return number of saved SSE registrers. */
9623 static int
9624 ix86_nsaved_sseregs (void)
9626 int nregs = 0;
9627 int regno;
9629 if (!TARGET_64BIT_MS_ABI)
9630 return 0;
9631 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9632 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9633 nregs ++;
9634 return nregs;
9637 /* Given FROM and TO register numbers, say whether this elimination is
9638 allowed. If stack alignment is needed, we can only replace argument
9639 pointer with hard frame pointer, or replace frame pointer with stack
9640 pointer. Otherwise, frame pointer elimination is automatically
9641 handled and all other eliminations are valid. */
9643 static bool
9644 ix86_can_eliminate (const int from, const int to)
9646 if (stack_realign_fp)
9647 return ((from == ARG_POINTER_REGNUM
9648 && to == HARD_FRAME_POINTER_REGNUM)
9649 || (from == FRAME_POINTER_REGNUM
9650 && to == STACK_POINTER_REGNUM));
9651 else
9652 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9655 /* Return the offset between two registers, one to be eliminated, and the other
9656 its replacement, at the start of a routine. */
9658 HOST_WIDE_INT
9659 ix86_initial_elimination_offset (int from, int to)
9661 struct ix86_frame frame;
9662 ix86_compute_frame_layout (&frame);
9664 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9665 return frame.hard_frame_pointer_offset;
9666 else if (from == FRAME_POINTER_REGNUM
9667 && to == HARD_FRAME_POINTER_REGNUM)
9668 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9669 else
9671 gcc_assert (to == STACK_POINTER_REGNUM);
9673 if (from == ARG_POINTER_REGNUM)
9674 return frame.stack_pointer_offset;
9676 gcc_assert (from == FRAME_POINTER_REGNUM);
9677 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9681 /* In a dynamically-aligned function, we can't know the offset from
9682 stack pointer to frame pointer, so we must ensure that setjmp
9683 eliminates fp against the hard fp (%ebp) rather than trying to
9684 index from %esp up to the top of the frame across a gap that is
9685 of unknown (at compile-time) size. */
9686 static rtx
9687 ix86_builtin_setjmp_frame_value (void)
9689 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9692 /* When using -fsplit-stack, the allocation routines set a field in
9693 the TCB to the bottom of the stack plus this much space, measured
9694 in bytes. */
9696 #define SPLIT_STACK_AVAILABLE 256
9698 /* Fill structure ix86_frame about frame of currently computed function. */
9700 static void
9701 ix86_compute_frame_layout (struct ix86_frame *frame)
9703 unsigned HOST_WIDE_INT stack_alignment_needed;
9704 HOST_WIDE_INT offset;
9705 unsigned HOST_WIDE_INT preferred_alignment;
9706 HOST_WIDE_INT size = get_frame_size ();
9707 HOST_WIDE_INT to_allocate;
9709 frame->nregs = ix86_nsaved_regs ();
9710 frame->nsseregs = ix86_nsaved_sseregs ();
9712 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9713 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9715 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9716 function prologues and leaf. */
9717 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9718 && (!crtl->is_leaf || cfun->calls_alloca != 0
9719 || ix86_current_function_calls_tls_descriptor))
9721 preferred_alignment = 16;
9722 stack_alignment_needed = 16;
9723 crtl->preferred_stack_boundary = 128;
9724 crtl->stack_alignment_needed = 128;
9727 gcc_assert (!size || stack_alignment_needed);
9728 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9729 gcc_assert (preferred_alignment <= stack_alignment_needed);
9731 /* For SEH we have to limit the amount of code movement into the prologue.
9732 At present we do this via a BLOCKAGE, at which point there's very little
9733 scheduling that can be done, which means that there's very little point
9734 in doing anything except PUSHs. */
9735 if (TARGET_SEH)
9736 cfun->machine->use_fast_prologue_epilogue = false;
9738 /* During reload iteration the amount of registers saved can change.
9739 Recompute the value as needed. Do not recompute when amount of registers
9740 didn't change as reload does multiple calls to the function and does not
9741 expect the decision to change within single iteration. */
9742 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9743 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9745 int count = frame->nregs;
9746 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9748 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9750 /* The fast prologue uses move instead of push to save registers. This
9751 is significantly longer, but also executes faster as modern hardware
9752 can execute the moves in parallel, but can't do that for push/pop.
9754 Be careful about choosing what prologue to emit: When function takes
9755 many instructions to execute we may use slow version as well as in
9756 case function is known to be outside hot spot (this is known with
9757 feedback only). Weight the size of function by number of registers
9758 to save as it is cheap to use one or two push instructions but very
9759 slow to use many of them. */
9760 if (count)
9761 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9762 if (node->frequency < NODE_FREQUENCY_NORMAL
9763 || (flag_branch_probabilities
9764 && node->frequency < NODE_FREQUENCY_HOT))
9765 cfun->machine->use_fast_prologue_epilogue = false;
9766 else
9767 cfun->machine->use_fast_prologue_epilogue
9768 = !expensive_function_p (count);
9771 frame->save_regs_using_mov
9772 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9773 /* If static stack checking is enabled and done with probes,
9774 the registers need to be saved before allocating the frame. */
9775 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9777 /* Skip return address. */
9778 offset = UNITS_PER_WORD;
9780 /* Skip pushed static chain. */
9781 if (ix86_static_chain_on_stack)
9782 offset += UNITS_PER_WORD;
9784 /* Skip saved base pointer. */
9785 if (frame_pointer_needed)
9786 offset += UNITS_PER_WORD;
9787 frame->hfp_save_offset = offset;
9789 /* The traditional frame pointer location is at the top of the frame. */
9790 frame->hard_frame_pointer_offset = offset;
9792 /* Register save area */
9793 offset += frame->nregs * UNITS_PER_WORD;
9794 frame->reg_save_offset = offset;
9796 /* On SEH target, registers are pushed just before the frame pointer
9797 location. */
9798 if (TARGET_SEH)
9799 frame->hard_frame_pointer_offset = offset;
9801 /* Align and set SSE register save area. */
9802 if (frame->nsseregs)
9804 /* The only ABI that has saved SSE registers (Win64) also has a
9805 16-byte aligned default stack, and thus we don't need to be
9806 within the re-aligned local stack frame to save them. */
9807 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9808 offset = (offset + 16 - 1) & -16;
9809 offset += frame->nsseregs * 16;
9811 frame->sse_reg_save_offset = offset;
9813 /* The re-aligned stack starts here. Values before this point are not
9814 directly comparable with values below this point. In order to make
9815 sure that no value happens to be the same before and after, force
9816 the alignment computation below to add a non-zero value. */
9817 if (stack_realign_fp)
9818 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9820 /* Va-arg area */
9821 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9822 offset += frame->va_arg_size;
9824 /* Align start of frame for local function. */
9825 if (stack_realign_fp
9826 || offset != frame->sse_reg_save_offset
9827 || size != 0
9828 || !crtl->is_leaf
9829 || cfun->calls_alloca
9830 || ix86_current_function_calls_tls_descriptor)
9831 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9833 /* Frame pointer points here. */
9834 frame->frame_pointer_offset = offset;
9836 offset += size;
9838 /* Add outgoing arguments area. Can be skipped if we eliminated
9839 all the function calls as dead code.
9840 Skipping is however impossible when function calls alloca. Alloca
9841 expander assumes that last crtl->outgoing_args_size
9842 of stack frame are unused. */
9843 if (ACCUMULATE_OUTGOING_ARGS
9844 && (!crtl->is_leaf || cfun->calls_alloca
9845 || ix86_current_function_calls_tls_descriptor))
9847 offset += crtl->outgoing_args_size;
9848 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9850 else
9851 frame->outgoing_arguments_size = 0;
9853 /* Align stack boundary. Only needed if we're calling another function
9854 or using alloca. */
9855 if (!crtl->is_leaf || cfun->calls_alloca
9856 || ix86_current_function_calls_tls_descriptor)
9857 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9859 /* We've reached end of stack frame. */
9860 frame->stack_pointer_offset = offset;
9862 /* Size prologue needs to allocate. */
9863 to_allocate = offset - frame->sse_reg_save_offset;
9865 if ((!to_allocate && frame->nregs <= 1)
9866 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9867 frame->save_regs_using_mov = false;
9869 if (ix86_using_red_zone ()
9870 && crtl->sp_is_unchanging
9871 && crtl->is_leaf
9872 && !ix86_current_function_calls_tls_descriptor)
9874 frame->red_zone_size = to_allocate;
9875 if (frame->save_regs_using_mov)
9876 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9877 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9878 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9880 else
9881 frame->red_zone_size = 0;
9882 frame->stack_pointer_offset -= frame->red_zone_size;
9884 /* The SEH frame pointer location is near the bottom of the frame.
9885 This is enforced by the fact that the difference between the
9886 stack pointer and the frame pointer is limited to 240 bytes in
9887 the unwind data structure. */
9888 if (TARGET_SEH)
9890 HOST_WIDE_INT diff;
9892 /* If we can leave the frame pointer where it is, do so. Also, returns
9893 the establisher frame for __builtin_frame_address (0). */
9894 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9895 if (diff <= SEH_MAX_FRAME_SIZE
9896 && (diff > 240 || (diff & 15) != 0)
9897 && !crtl->accesses_prior_frames)
9899 /* Ideally we'd determine what portion of the local stack frame
9900 (within the constraint of the lowest 240) is most heavily used.
9901 But without that complication, simply bias the frame pointer
9902 by 128 bytes so as to maximize the amount of the local stack
9903 frame that is addressable with 8-bit offsets. */
9904 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9909 /* This is semi-inlined memory_address_length, but simplified
9910 since we know that we're always dealing with reg+offset, and
9911 to avoid having to create and discard all that rtl. */
9913 static inline int
9914 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9916 int len = 4;
9918 if (offset == 0)
9920 /* EBP and R13 cannot be encoded without an offset. */
9921 len = (regno == BP_REG || regno == R13_REG);
9923 else if (IN_RANGE (offset, -128, 127))
9924 len = 1;
9926 /* ESP and R12 must be encoded with a SIB byte. */
9927 if (regno == SP_REG || regno == R12_REG)
9928 len++;
9930 return len;
9933 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9934 The valid base registers are taken from CFUN->MACHINE->FS. */
9936 static rtx
9937 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9939 const struct machine_function *m = cfun->machine;
9940 rtx base_reg = NULL;
9941 HOST_WIDE_INT base_offset = 0;
9943 if (m->use_fast_prologue_epilogue)
9945 /* Choose the base register most likely to allow the most scheduling
9946 opportunities. Generally FP is valid throughout the function,
9947 while DRAP must be reloaded within the epilogue. But choose either
9948 over the SP due to increased encoding size. */
9950 if (m->fs.fp_valid)
9952 base_reg = hard_frame_pointer_rtx;
9953 base_offset = m->fs.fp_offset - cfa_offset;
9955 else if (m->fs.drap_valid)
9957 base_reg = crtl->drap_reg;
9958 base_offset = 0 - cfa_offset;
9960 else if (m->fs.sp_valid)
9962 base_reg = stack_pointer_rtx;
9963 base_offset = m->fs.sp_offset - cfa_offset;
9966 else
9968 HOST_WIDE_INT toffset;
9969 int len = 16, tlen;
9971 /* Choose the base register with the smallest address encoding.
9972 With a tie, choose FP > DRAP > SP. */
9973 if (m->fs.sp_valid)
9975 base_reg = stack_pointer_rtx;
9976 base_offset = m->fs.sp_offset - cfa_offset;
9977 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9979 if (m->fs.drap_valid)
9981 toffset = 0 - cfa_offset;
9982 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9983 if (tlen <= len)
9985 base_reg = crtl->drap_reg;
9986 base_offset = toffset;
9987 len = tlen;
9990 if (m->fs.fp_valid)
9992 toffset = m->fs.fp_offset - cfa_offset;
9993 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9994 if (tlen <= len)
9996 base_reg = hard_frame_pointer_rtx;
9997 base_offset = toffset;
9998 len = tlen;
10002 gcc_assert (base_reg != NULL);
10004 return plus_constant (Pmode, base_reg, base_offset);
10007 /* Emit code to save registers in the prologue. */
10009 static void
10010 ix86_emit_save_regs (void)
10012 unsigned int regno;
10013 rtx insn;
10015 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10016 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10018 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10019 RTX_FRAME_RELATED_P (insn) = 1;
10023 /* Emit a single register save at CFA - CFA_OFFSET. */
10025 static void
10026 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
10027 HOST_WIDE_INT cfa_offset)
10029 struct machine_function *m = cfun->machine;
10030 rtx reg = gen_rtx_REG (mode, regno);
10031 rtx mem, addr, base, insn;
10033 addr = choose_baseaddr (cfa_offset);
10034 mem = gen_frame_mem (mode, addr);
10036 /* For SSE saves, we need to indicate the 128-bit alignment. */
10037 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10039 insn = emit_move_insn (mem, reg);
10040 RTX_FRAME_RELATED_P (insn) = 1;
10042 base = addr;
10043 if (GET_CODE (base) == PLUS)
10044 base = XEXP (base, 0);
10045 gcc_checking_assert (REG_P (base));
10047 /* When saving registers into a re-aligned local stack frame, avoid
10048 any tricky guessing by dwarf2out. */
10049 if (m->fs.realigned)
10051 gcc_checking_assert (stack_realign_drap);
10053 if (regno == REGNO (crtl->drap_reg))
10055 /* A bit of a hack. We force the DRAP register to be saved in
10056 the re-aligned stack frame, which provides us with a copy
10057 of the CFA that will last past the prologue. Install it. */
10058 gcc_checking_assert (cfun->machine->fs.fp_valid);
10059 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10060 cfun->machine->fs.fp_offset - cfa_offset);
10061 mem = gen_rtx_MEM (mode, addr);
10062 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10064 else
10066 /* The frame pointer is a stable reference within the
10067 aligned frame. Use it. */
10068 gcc_checking_assert (cfun->machine->fs.fp_valid);
10069 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10070 cfun->machine->fs.fp_offset - cfa_offset);
10071 mem = gen_rtx_MEM (mode, addr);
10072 add_reg_note (insn, REG_CFA_EXPRESSION,
10073 gen_rtx_SET (VOIDmode, mem, reg));
10077 /* The memory may not be relative to the current CFA register,
10078 which means that we may need to generate a new pattern for
10079 use by the unwind info. */
10080 else if (base != m->fs.cfa_reg)
10082 addr = plus_constant (Pmode, m->fs.cfa_reg,
10083 m->fs.cfa_offset - cfa_offset);
10084 mem = gen_rtx_MEM (mode, addr);
10085 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10089 /* Emit code to save registers using MOV insns.
10090 First register is stored at CFA - CFA_OFFSET. */
10091 static void
10092 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10094 unsigned int regno;
10096 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10097 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10099 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10100 cfa_offset -= UNITS_PER_WORD;
10104 /* Emit code to save SSE registers using MOV insns.
10105 First register is stored at CFA - CFA_OFFSET. */
10106 static void
10107 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10109 unsigned int regno;
10111 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10112 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10114 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10115 cfa_offset -= 16;
10119 static GTY(()) rtx queued_cfa_restores;
10121 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10122 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10123 Don't add the note if the previously saved value will be left untouched
10124 within stack red-zone till return, as unwinders can find the same value
10125 in the register and on the stack. */
10127 static void
10128 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10130 if (!crtl->shrink_wrapped
10131 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10132 return;
10134 if (insn)
10136 add_reg_note (insn, REG_CFA_RESTORE, reg);
10137 RTX_FRAME_RELATED_P (insn) = 1;
10139 else
10140 queued_cfa_restores
10141 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10144 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10146 static void
10147 ix86_add_queued_cfa_restore_notes (rtx insn)
10149 rtx last;
10150 if (!queued_cfa_restores)
10151 return;
10152 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10154 XEXP (last, 1) = REG_NOTES (insn);
10155 REG_NOTES (insn) = queued_cfa_restores;
10156 queued_cfa_restores = NULL_RTX;
10157 RTX_FRAME_RELATED_P (insn) = 1;
10160 /* Expand prologue or epilogue stack adjustment.
10161 The pattern exist to put a dependency on all ebp-based memory accesses.
10162 STYLE should be negative if instructions should be marked as frame related,
10163 zero if %r11 register is live and cannot be freely used and positive
10164 otherwise. */
10166 static void
10167 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10168 int style, bool set_cfa)
10170 struct machine_function *m = cfun->machine;
10171 rtx insn;
10172 bool add_frame_related_expr = false;
10174 if (Pmode == SImode)
10175 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10176 else if (x86_64_immediate_operand (offset, DImode))
10177 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10178 else
10180 rtx tmp;
10181 /* r11 is used by indirect sibcall return as well, set before the
10182 epilogue and used after the epilogue. */
10183 if (style)
10184 tmp = gen_rtx_REG (DImode, R11_REG);
10185 else
10187 gcc_assert (src != hard_frame_pointer_rtx
10188 && dest != hard_frame_pointer_rtx);
10189 tmp = hard_frame_pointer_rtx;
10191 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10192 if (style < 0)
10193 add_frame_related_expr = true;
10195 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10198 insn = emit_insn (insn);
10199 if (style >= 0)
10200 ix86_add_queued_cfa_restore_notes (insn);
10202 if (set_cfa)
10204 rtx r;
10206 gcc_assert (m->fs.cfa_reg == src);
10207 m->fs.cfa_offset += INTVAL (offset);
10208 m->fs.cfa_reg = dest;
10210 r = gen_rtx_PLUS (Pmode, src, offset);
10211 r = gen_rtx_SET (VOIDmode, dest, r);
10212 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10213 RTX_FRAME_RELATED_P (insn) = 1;
10215 else if (style < 0)
10217 RTX_FRAME_RELATED_P (insn) = 1;
10218 if (add_frame_related_expr)
10220 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10221 r = gen_rtx_SET (VOIDmode, dest, r);
10222 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10226 if (dest == stack_pointer_rtx)
10228 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10229 bool valid = m->fs.sp_valid;
10231 if (src == hard_frame_pointer_rtx)
10233 valid = m->fs.fp_valid;
10234 ooffset = m->fs.fp_offset;
10236 else if (src == crtl->drap_reg)
10238 valid = m->fs.drap_valid;
10239 ooffset = 0;
10241 else
10243 /* Else there are two possibilities: SP itself, which we set
10244 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10245 taken care of this by hand along the eh_return path. */
10246 gcc_checking_assert (src == stack_pointer_rtx
10247 || offset == const0_rtx);
10250 m->fs.sp_offset = ooffset - INTVAL (offset);
10251 m->fs.sp_valid = valid;
10255 /* Find an available register to be used as dynamic realign argument
10256 pointer regsiter. Such a register will be written in prologue and
10257 used in begin of body, so it must not be
10258 1. parameter passing register.
10259 2. GOT pointer.
10260 We reuse static-chain register if it is available. Otherwise, we
10261 use DI for i386 and R13 for x86-64. We chose R13 since it has
10262 shorter encoding.
10264 Return: the regno of chosen register. */
10266 static unsigned int
10267 find_drap_reg (void)
10269 tree decl = cfun->decl;
10271 if (TARGET_64BIT)
10273 /* Use R13 for nested function or function need static chain.
10274 Since function with tail call may use any caller-saved
10275 registers in epilogue, DRAP must not use caller-saved
10276 register in such case. */
10277 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10278 return R13_REG;
10280 return R10_REG;
10282 else
10284 /* Use DI for nested function or function need static chain.
10285 Since function with tail call may use any caller-saved
10286 registers in epilogue, DRAP must not use caller-saved
10287 register in such case. */
10288 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10289 return DI_REG;
10291 /* Reuse static chain register if it isn't used for parameter
10292 passing. */
10293 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10295 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10296 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10297 return CX_REG;
10299 return DI_REG;
10303 /* Return minimum incoming stack alignment. */
10305 static unsigned int
10306 ix86_minimum_incoming_stack_boundary (bool sibcall)
10308 unsigned int incoming_stack_boundary;
10310 /* Prefer the one specified at command line. */
10311 if (ix86_user_incoming_stack_boundary)
10312 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10313 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10314 if -mstackrealign is used, it isn't used for sibcall check and
10315 estimated stack alignment is 128bit. */
10316 else if (!sibcall
10317 && !TARGET_64BIT
10318 && ix86_force_align_arg_pointer
10319 && crtl->stack_alignment_estimated == 128)
10320 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10321 else
10322 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10324 /* Incoming stack alignment can be changed on individual functions
10325 via force_align_arg_pointer attribute. We use the smallest
10326 incoming stack boundary. */
10327 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10328 && lookup_attribute (ix86_force_align_arg_pointer_string,
10329 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10330 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10332 /* The incoming stack frame has to be aligned at least at
10333 parm_stack_boundary. */
10334 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10335 incoming_stack_boundary = crtl->parm_stack_boundary;
10337 /* Stack at entrance of main is aligned by runtime. We use the
10338 smallest incoming stack boundary. */
10339 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10340 && DECL_NAME (current_function_decl)
10341 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10342 && DECL_FILE_SCOPE_P (current_function_decl))
10343 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10345 return incoming_stack_boundary;
10348 /* Update incoming stack boundary and estimated stack alignment. */
10350 static void
10351 ix86_update_stack_boundary (void)
10353 ix86_incoming_stack_boundary
10354 = ix86_minimum_incoming_stack_boundary (false);
10356 /* x86_64 vararg needs 16byte stack alignment for register save
10357 area. */
10358 if (TARGET_64BIT
10359 && cfun->stdarg
10360 && crtl->stack_alignment_estimated < 128)
10361 crtl->stack_alignment_estimated = 128;
10364 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10365 needed or an rtx for DRAP otherwise. */
10367 static rtx
10368 ix86_get_drap_rtx (void)
10370 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10371 crtl->need_drap = true;
10373 if (stack_realign_drap)
10375 /* Assign DRAP to vDRAP and returns vDRAP */
10376 unsigned int regno = find_drap_reg ();
10377 rtx drap_vreg;
10378 rtx arg_ptr;
10379 rtx seq, insn;
10381 arg_ptr = gen_rtx_REG (Pmode, regno);
10382 crtl->drap_reg = arg_ptr;
10384 start_sequence ();
10385 drap_vreg = copy_to_reg (arg_ptr);
10386 seq = get_insns ();
10387 end_sequence ();
10389 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10390 if (!optimize)
10392 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10393 RTX_FRAME_RELATED_P (insn) = 1;
10395 return drap_vreg;
10397 else
10398 return NULL;
10401 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10403 static rtx
10404 ix86_internal_arg_pointer (void)
10406 return virtual_incoming_args_rtx;
10409 struct scratch_reg {
10410 rtx reg;
10411 bool saved;
10414 /* Return a short-lived scratch register for use on function entry.
10415 In 32-bit mode, it is valid only after the registers are saved
10416 in the prologue. This register must be released by means of
10417 release_scratch_register_on_entry once it is dead. */
10419 static void
10420 get_scratch_register_on_entry (struct scratch_reg *sr)
10422 int regno;
10424 sr->saved = false;
10426 if (TARGET_64BIT)
10428 /* We always use R11 in 64-bit mode. */
10429 regno = R11_REG;
10431 else
10433 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10434 bool fastcall_p
10435 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10436 bool thiscall_p
10437 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10438 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10439 int regparm = ix86_function_regparm (fntype, decl);
10440 int drap_regno
10441 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10443 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10444 for the static chain register. */
10445 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10446 && drap_regno != AX_REG)
10447 regno = AX_REG;
10448 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10449 for the static chain register. */
10450 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10451 regno = AX_REG;
10452 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10453 regno = DX_REG;
10454 /* ecx is the static chain register. */
10455 else if (regparm < 3 && !fastcall_p && !thiscall_p
10456 && !static_chain_p
10457 && drap_regno != CX_REG)
10458 regno = CX_REG;
10459 else if (ix86_save_reg (BX_REG, true))
10460 regno = BX_REG;
10461 /* esi is the static chain register. */
10462 else if (!(regparm == 3 && static_chain_p)
10463 && ix86_save_reg (SI_REG, true))
10464 regno = SI_REG;
10465 else if (ix86_save_reg (DI_REG, true))
10466 regno = DI_REG;
10467 else
10469 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10470 sr->saved = true;
10474 sr->reg = gen_rtx_REG (Pmode, regno);
10475 if (sr->saved)
10477 rtx insn = emit_insn (gen_push (sr->reg));
10478 RTX_FRAME_RELATED_P (insn) = 1;
10482 /* Release a scratch register obtained from the preceding function. */
10484 static void
10485 release_scratch_register_on_entry (struct scratch_reg *sr)
10487 if (sr->saved)
10489 struct machine_function *m = cfun->machine;
10490 rtx x, insn = emit_insn (gen_pop (sr->reg));
10492 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10493 RTX_FRAME_RELATED_P (insn) = 1;
10494 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10495 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10496 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10497 m->fs.sp_offset -= UNITS_PER_WORD;
10501 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10503 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10505 static void
10506 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10508 /* We skip the probe for the first interval + a small dope of 4 words and
10509 probe that many bytes past the specified size to maintain a protection
10510 area at the botton of the stack. */
10511 const int dope = 4 * UNITS_PER_WORD;
10512 rtx size_rtx = GEN_INT (size), last;
10514 /* See if we have a constant small number of probes to generate. If so,
10515 that's the easy case. The run-time loop is made up of 11 insns in the
10516 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10517 for n # of intervals. */
10518 if (size <= 5 * PROBE_INTERVAL)
10520 HOST_WIDE_INT i, adjust;
10521 bool first_probe = true;
10523 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10524 values of N from 1 until it exceeds SIZE. If only one probe is
10525 needed, this will not generate any code. Then adjust and probe
10526 to PROBE_INTERVAL + SIZE. */
10527 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10529 if (first_probe)
10531 adjust = 2 * PROBE_INTERVAL + dope;
10532 first_probe = false;
10534 else
10535 adjust = PROBE_INTERVAL;
10537 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10538 plus_constant (Pmode, stack_pointer_rtx,
10539 -adjust)));
10540 emit_stack_probe (stack_pointer_rtx);
10543 if (first_probe)
10544 adjust = size + PROBE_INTERVAL + dope;
10545 else
10546 adjust = size + PROBE_INTERVAL - i;
10548 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10549 plus_constant (Pmode, stack_pointer_rtx,
10550 -adjust)));
10551 emit_stack_probe (stack_pointer_rtx);
10553 /* Adjust back to account for the additional first interval. */
10554 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10555 plus_constant (Pmode, stack_pointer_rtx,
10556 PROBE_INTERVAL + dope)));
10559 /* Otherwise, do the same as above, but in a loop. Note that we must be
10560 extra careful with variables wrapping around because we might be at
10561 the very top (or the very bottom) of the address space and we have
10562 to be able to handle this case properly; in particular, we use an
10563 equality test for the loop condition. */
10564 else
10566 HOST_WIDE_INT rounded_size;
10567 struct scratch_reg sr;
10569 get_scratch_register_on_entry (&sr);
10572 /* Step 1: round SIZE to the previous multiple of the interval. */
10574 rounded_size = size & -PROBE_INTERVAL;
10577 /* Step 2: compute initial and final value of the loop counter. */
10579 /* SP = SP_0 + PROBE_INTERVAL. */
10580 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10581 plus_constant (Pmode, stack_pointer_rtx,
10582 - (PROBE_INTERVAL + dope))));
10584 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10585 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10586 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10587 gen_rtx_PLUS (Pmode, sr.reg,
10588 stack_pointer_rtx)));
10591 /* Step 3: the loop
10593 while (SP != LAST_ADDR)
10595 SP = SP + PROBE_INTERVAL
10596 probe at SP
10599 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10600 values of N from 1 until it is equal to ROUNDED_SIZE. */
10602 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10605 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10606 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10608 if (size != rounded_size)
10610 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10611 plus_constant (Pmode, stack_pointer_rtx,
10612 rounded_size - size)));
10613 emit_stack_probe (stack_pointer_rtx);
10616 /* Adjust back to account for the additional first interval. */
10617 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10618 plus_constant (Pmode, stack_pointer_rtx,
10619 PROBE_INTERVAL + dope)));
10621 release_scratch_register_on_entry (&sr);
10624 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10626 /* Even if the stack pointer isn't the CFA register, we need to correctly
10627 describe the adjustments made to it, in particular differentiate the
10628 frame-related ones from the frame-unrelated ones. */
10629 if (size > 0)
10631 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10632 XVECEXP (expr, 0, 0)
10633 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10634 plus_constant (Pmode, stack_pointer_rtx, -size));
10635 XVECEXP (expr, 0, 1)
10636 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10637 plus_constant (Pmode, stack_pointer_rtx,
10638 PROBE_INTERVAL + dope + size));
10639 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10640 RTX_FRAME_RELATED_P (last) = 1;
10642 cfun->machine->fs.sp_offset += size;
10645 /* Make sure nothing is scheduled before we are done. */
10646 emit_insn (gen_blockage ());
10649 /* Adjust the stack pointer up to REG while probing it. */
10651 const char *
10652 output_adjust_stack_and_probe (rtx reg)
10654 static int labelno = 0;
10655 char loop_lab[32], end_lab[32];
10656 rtx xops[2];
10658 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10659 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10661 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10663 /* Jump to END_LAB if SP == LAST_ADDR. */
10664 xops[0] = stack_pointer_rtx;
10665 xops[1] = reg;
10666 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10667 fputs ("\tje\t", asm_out_file);
10668 assemble_name_raw (asm_out_file, end_lab);
10669 fputc ('\n', asm_out_file);
10671 /* SP = SP + PROBE_INTERVAL. */
10672 xops[1] = GEN_INT (PROBE_INTERVAL);
10673 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10675 /* Probe at SP. */
10676 xops[1] = const0_rtx;
10677 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10679 fprintf (asm_out_file, "\tjmp\t");
10680 assemble_name_raw (asm_out_file, loop_lab);
10681 fputc ('\n', asm_out_file);
10683 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10685 return "";
10688 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10689 inclusive. These are offsets from the current stack pointer. */
10691 static void
10692 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10694 /* See if we have a constant small number of probes to generate. If so,
10695 that's the easy case. The run-time loop is made up of 7 insns in the
10696 generic case while the compile-time loop is made up of n insns for n #
10697 of intervals. */
10698 if (size <= 7 * PROBE_INTERVAL)
10700 HOST_WIDE_INT i;
10702 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10703 it exceeds SIZE. If only one probe is needed, this will not
10704 generate any code. Then probe at FIRST + SIZE. */
10705 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10706 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10707 -(first + i)));
10709 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10710 -(first + size)));
10713 /* Otherwise, do the same as above, but in a loop. Note that we must be
10714 extra careful with variables wrapping around because we might be at
10715 the very top (or the very bottom) of the address space and we have
10716 to be able to handle this case properly; in particular, we use an
10717 equality test for the loop condition. */
10718 else
10720 HOST_WIDE_INT rounded_size, last;
10721 struct scratch_reg sr;
10723 get_scratch_register_on_entry (&sr);
10726 /* Step 1: round SIZE to the previous multiple of the interval. */
10728 rounded_size = size & -PROBE_INTERVAL;
10731 /* Step 2: compute initial and final value of the loop counter. */
10733 /* TEST_OFFSET = FIRST. */
10734 emit_move_insn (sr.reg, GEN_INT (-first));
10736 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10737 last = first + rounded_size;
10740 /* Step 3: the loop
10742 while (TEST_ADDR != LAST_ADDR)
10744 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10745 probe at TEST_ADDR
10748 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10749 until it is equal to ROUNDED_SIZE. */
10751 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10754 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10755 that SIZE is equal to ROUNDED_SIZE. */
10757 if (size != rounded_size)
10758 emit_stack_probe (plus_constant (Pmode,
10759 gen_rtx_PLUS (Pmode,
10760 stack_pointer_rtx,
10761 sr.reg),
10762 rounded_size - size));
10764 release_scratch_register_on_entry (&sr);
10767 /* Make sure nothing is scheduled before we are done. */
10768 emit_insn (gen_blockage ());
10771 /* Probe a range of stack addresses from REG to END, inclusive. These are
10772 offsets from the current stack pointer. */
10774 const char *
10775 output_probe_stack_range (rtx reg, rtx end)
10777 static int labelno = 0;
10778 char loop_lab[32], end_lab[32];
10779 rtx xops[3];
10781 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10782 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10784 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10786 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10787 xops[0] = reg;
10788 xops[1] = end;
10789 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10790 fputs ("\tje\t", asm_out_file);
10791 assemble_name_raw (asm_out_file, end_lab);
10792 fputc ('\n', asm_out_file);
10794 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10795 xops[1] = GEN_INT (PROBE_INTERVAL);
10796 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10798 /* Probe at TEST_ADDR. */
10799 xops[0] = stack_pointer_rtx;
10800 xops[1] = reg;
10801 xops[2] = const0_rtx;
10802 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10804 fprintf (asm_out_file, "\tjmp\t");
10805 assemble_name_raw (asm_out_file, loop_lab);
10806 fputc ('\n', asm_out_file);
10808 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10810 return "";
10813 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10814 to be generated in correct form. */
10815 static void
10816 ix86_finalize_stack_realign_flags (void)
10818 /* Check if stack realign is really needed after reload, and
10819 stores result in cfun */
10820 unsigned int incoming_stack_boundary
10821 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10822 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10823 unsigned int stack_realign = (incoming_stack_boundary
10824 < (crtl->is_leaf
10825 ? crtl->max_used_stack_slot_alignment
10826 : crtl->stack_alignment_needed));
10828 if (crtl->stack_realign_finalized)
10830 /* After stack_realign_needed is finalized, we can't no longer
10831 change it. */
10832 gcc_assert (crtl->stack_realign_needed == stack_realign);
10833 return;
10836 /* If the only reason for frame_pointer_needed is that we conservatively
10837 assumed stack realignment might be needed, but in the end nothing that
10838 needed the stack alignment had been spilled, clear frame_pointer_needed
10839 and say we don't need stack realignment. */
10840 if (stack_realign
10841 && frame_pointer_needed
10842 && crtl->is_leaf
10843 && flag_omit_frame_pointer
10844 && crtl->sp_is_unchanging
10845 && !ix86_current_function_calls_tls_descriptor
10846 && !crtl->accesses_prior_frames
10847 && !cfun->calls_alloca
10848 && !crtl->calls_eh_return
10849 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10850 && !ix86_frame_pointer_required ()
10851 && get_frame_size () == 0
10852 && ix86_nsaved_sseregs () == 0
10853 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10855 HARD_REG_SET set_up_by_prologue, prologue_used;
10856 basic_block bb;
10858 CLEAR_HARD_REG_SET (prologue_used);
10859 CLEAR_HARD_REG_SET (set_up_by_prologue);
10860 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10861 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10862 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10863 HARD_FRAME_POINTER_REGNUM);
10864 FOR_EACH_BB_FN (bb, cfun)
10866 rtx insn;
10867 FOR_BB_INSNS (bb, insn)
10868 if (NONDEBUG_INSN_P (insn)
10869 && requires_stack_frame_p (insn, prologue_used,
10870 set_up_by_prologue))
10872 crtl->stack_realign_needed = stack_realign;
10873 crtl->stack_realign_finalized = true;
10874 return;
10878 /* If drap has been set, but it actually isn't live at the start
10879 of the function, there is no reason to set it up. */
10880 if (crtl->drap_reg)
10882 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10883 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10885 crtl->drap_reg = NULL_RTX;
10886 crtl->need_drap = false;
10889 else
10890 cfun->machine->no_drap_save_restore = true;
10892 frame_pointer_needed = false;
10893 stack_realign = false;
10894 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10895 crtl->stack_alignment_needed = incoming_stack_boundary;
10896 crtl->stack_alignment_estimated = incoming_stack_boundary;
10897 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10898 crtl->preferred_stack_boundary = incoming_stack_boundary;
10899 df_finish_pass (true);
10900 df_scan_alloc (NULL);
10901 df_scan_blocks ();
10902 df_compute_regs_ever_live (true);
10903 df_analyze ();
10906 crtl->stack_realign_needed = stack_realign;
10907 crtl->stack_realign_finalized = true;
10910 /* Expand the prologue into a bunch of separate insns. */
10912 void
10913 ix86_expand_prologue (void)
10915 struct machine_function *m = cfun->machine;
10916 rtx insn, t;
10917 bool pic_reg_used;
10918 struct ix86_frame frame;
10919 HOST_WIDE_INT allocate;
10920 bool int_registers_saved;
10921 bool sse_registers_saved;
10923 ix86_finalize_stack_realign_flags ();
10925 /* DRAP should not coexist with stack_realign_fp */
10926 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10928 memset (&m->fs, 0, sizeof (m->fs));
10930 /* Initialize CFA state for before the prologue. */
10931 m->fs.cfa_reg = stack_pointer_rtx;
10932 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10934 /* Track SP offset to the CFA. We continue tracking this after we've
10935 swapped the CFA register away from SP. In the case of re-alignment
10936 this is fudged; we're interested to offsets within the local frame. */
10937 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10938 m->fs.sp_valid = true;
10940 ix86_compute_frame_layout (&frame);
10942 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10944 /* We should have already generated an error for any use of
10945 ms_hook on a nested function. */
10946 gcc_checking_assert (!ix86_static_chain_on_stack);
10948 /* Check if profiling is active and we shall use profiling before
10949 prologue variant. If so sorry. */
10950 if (crtl->profile && flag_fentry != 0)
10951 sorry ("ms_hook_prologue attribute isn%'t compatible "
10952 "with -mfentry for 32-bit");
10954 /* In ix86_asm_output_function_label we emitted:
10955 8b ff movl.s %edi,%edi
10956 55 push %ebp
10957 8b ec movl.s %esp,%ebp
10959 This matches the hookable function prologue in Win32 API
10960 functions in Microsoft Windows XP Service Pack 2 and newer.
10961 Wine uses this to enable Windows apps to hook the Win32 API
10962 functions provided by Wine.
10964 What that means is that we've already set up the frame pointer. */
10966 if (frame_pointer_needed
10967 && !(crtl->drap_reg && crtl->stack_realign_needed))
10969 rtx push, mov;
10971 /* We've decided to use the frame pointer already set up.
10972 Describe this to the unwinder by pretending that both
10973 push and mov insns happen right here.
10975 Putting the unwind info here at the end of the ms_hook
10976 is done so that we can make absolutely certain we get
10977 the required byte sequence at the start of the function,
10978 rather than relying on an assembler that can produce
10979 the exact encoding required.
10981 However it does mean (in the unpatched case) that we have
10982 a 1 insn window where the asynchronous unwind info is
10983 incorrect. However, if we placed the unwind info at
10984 its correct location we would have incorrect unwind info
10985 in the patched case. Which is probably all moot since
10986 I don't expect Wine generates dwarf2 unwind info for the
10987 system libraries that use this feature. */
10989 insn = emit_insn (gen_blockage ());
10991 push = gen_push (hard_frame_pointer_rtx);
10992 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10993 stack_pointer_rtx);
10994 RTX_FRAME_RELATED_P (push) = 1;
10995 RTX_FRAME_RELATED_P (mov) = 1;
10997 RTX_FRAME_RELATED_P (insn) = 1;
10998 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10999 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11001 /* Note that gen_push incremented m->fs.cfa_offset, even
11002 though we didn't emit the push insn here. */
11003 m->fs.cfa_reg = hard_frame_pointer_rtx;
11004 m->fs.fp_offset = m->fs.cfa_offset;
11005 m->fs.fp_valid = true;
11007 else
11009 /* The frame pointer is not needed so pop %ebp again.
11010 This leaves us with a pristine state. */
11011 emit_insn (gen_pop (hard_frame_pointer_rtx));
11015 /* The first insn of a function that accepts its static chain on the
11016 stack is to push the register that would be filled in by a direct
11017 call. This insn will be skipped by the trampoline. */
11018 else if (ix86_static_chain_on_stack)
11020 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11021 emit_insn (gen_blockage ());
11023 /* We don't want to interpret this push insn as a register save,
11024 only as a stack adjustment. The real copy of the register as
11025 a save will be done later, if needed. */
11026 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11027 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11028 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11029 RTX_FRAME_RELATED_P (insn) = 1;
11032 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11033 of DRAP is needed and stack realignment is really needed after reload */
11034 if (stack_realign_drap)
11036 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11038 /* Only need to push parameter pointer reg if it is caller saved. */
11039 if (!call_used_regs[REGNO (crtl->drap_reg)])
11041 /* Push arg pointer reg */
11042 insn = emit_insn (gen_push (crtl->drap_reg));
11043 RTX_FRAME_RELATED_P (insn) = 1;
11046 /* Grab the argument pointer. */
11047 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11048 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11049 RTX_FRAME_RELATED_P (insn) = 1;
11050 m->fs.cfa_reg = crtl->drap_reg;
11051 m->fs.cfa_offset = 0;
11053 /* Align the stack. */
11054 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11055 stack_pointer_rtx,
11056 GEN_INT (-align_bytes)));
11057 RTX_FRAME_RELATED_P (insn) = 1;
11059 /* Replicate the return address on the stack so that return
11060 address can be reached via (argp - 1) slot. This is needed
11061 to implement macro RETURN_ADDR_RTX and intrinsic function
11062 expand_builtin_return_addr etc. */
11063 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11064 t = gen_frame_mem (word_mode, t);
11065 insn = emit_insn (gen_push (t));
11066 RTX_FRAME_RELATED_P (insn) = 1;
11068 /* For the purposes of frame and register save area addressing,
11069 we've started over with a new frame. */
11070 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11071 m->fs.realigned = true;
11074 int_registers_saved = (frame.nregs == 0);
11075 sse_registers_saved = (frame.nsseregs == 0);
11077 if (frame_pointer_needed && !m->fs.fp_valid)
11079 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11080 slower on all targets. Also sdb doesn't like it. */
11081 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11082 RTX_FRAME_RELATED_P (insn) = 1;
11084 /* Push registers now, before setting the frame pointer
11085 on SEH target. */
11086 if (!int_registers_saved
11087 && TARGET_SEH
11088 && !frame.save_regs_using_mov)
11090 ix86_emit_save_regs ();
11091 int_registers_saved = true;
11092 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11095 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11097 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11098 RTX_FRAME_RELATED_P (insn) = 1;
11100 if (m->fs.cfa_reg == stack_pointer_rtx)
11101 m->fs.cfa_reg = hard_frame_pointer_rtx;
11102 m->fs.fp_offset = m->fs.sp_offset;
11103 m->fs.fp_valid = true;
11107 if (!int_registers_saved)
11109 /* If saving registers via PUSH, do so now. */
11110 if (!frame.save_regs_using_mov)
11112 ix86_emit_save_regs ();
11113 int_registers_saved = true;
11114 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11117 /* When using red zone we may start register saving before allocating
11118 the stack frame saving one cycle of the prologue. However, avoid
11119 doing this if we have to probe the stack; at least on x86_64 the
11120 stack probe can turn into a call that clobbers a red zone location. */
11121 else if (ix86_using_red_zone ()
11122 && (! TARGET_STACK_PROBE
11123 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11125 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11126 int_registers_saved = true;
11130 if (stack_realign_fp)
11132 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11133 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11135 /* The computation of the size of the re-aligned stack frame means
11136 that we must allocate the size of the register save area before
11137 performing the actual alignment. Otherwise we cannot guarantee
11138 that there's enough storage above the realignment point. */
11139 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11140 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11141 GEN_INT (m->fs.sp_offset
11142 - frame.sse_reg_save_offset),
11143 -1, false);
11145 /* Align the stack. */
11146 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11147 stack_pointer_rtx,
11148 GEN_INT (-align_bytes)));
11150 /* For the purposes of register save area addressing, the stack
11151 pointer is no longer valid. As for the value of sp_offset,
11152 see ix86_compute_frame_layout, which we need to match in order
11153 to pass verification of stack_pointer_offset at the end. */
11154 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11155 m->fs.sp_valid = false;
11158 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11160 if (flag_stack_usage_info)
11162 /* We start to count from ARG_POINTER. */
11163 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11165 /* If it was realigned, take into account the fake frame. */
11166 if (stack_realign_drap)
11168 if (ix86_static_chain_on_stack)
11169 stack_size += UNITS_PER_WORD;
11171 if (!call_used_regs[REGNO (crtl->drap_reg)])
11172 stack_size += UNITS_PER_WORD;
11174 /* This over-estimates by 1 minimal-stack-alignment-unit but
11175 mitigates that by counting in the new return address slot. */
11176 current_function_dynamic_stack_size
11177 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11180 current_function_static_stack_size = stack_size;
11183 /* On SEH target with very large frame size, allocate an area to save
11184 SSE registers (as the very large allocation won't be described). */
11185 if (TARGET_SEH
11186 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11187 && !sse_registers_saved)
11189 HOST_WIDE_INT sse_size =
11190 frame.sse_reg_save_offset - frame.reg_save_offset;
11192 gcc_assert (int_registers_saved);
11194 /* No need to do stack checking as the area will be immediately
11195 written. */
11196 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11197 GEN_INT (-sse_size), -1,
11198 m->fs.cfa_reg == stack_pointer_rtx);
11199 allocate -= sse_size;
11200 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11201 sse_registers_saved = true;
11204 /* The stack has already been decremented by the instruction calling us
11205 so probe if the size is non-negative to preserve the protection area. */
11206 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11208 /* We expect the registers to be saved when probes are used. */
11209 gcc_assert (int_registers_saved);
11211 if (STACK_CHECK_MOVING_SP)
11213 if (!(crtl->is_leaf && !cfun->calls_alloca
11214 && allocate <= PROBE_INTERVAL))
11216 ix86_adjust_stack_and_probe (allocate);
11217 allocate = 0;
11220 else
11222 HOST_WIDE_INT size = allocate;
11224 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11225 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11227 if (TARGET_STACK_PROBE)
11229 if (crtl->is_leaf && !cfun->calls_alloca)
11231 if (size > PROBE_INTERVAL)
11232 ix86_emit_probe_stack_range (0, size);
11234 else
11235 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11237 else
11239 if (crtl->is_leaf && !cfun->calls_alloca)
11241 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11242 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11243 size - STACK_CHECK_PROTECT);
11245 else
11246 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11251 if (allocate == 0)
11253 else if (!ix86_target_stack_probe ()
11254 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11256 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11257 GEN_INT (-allocate), -1,
11258 m->fs.cfa_reg == stack_pointer_rtx);
11260 else
11262 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11263 rtx r10 = NULL;
11264 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11265 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11266 bool eax_live = ix86_eax_live_at_start_p ();
11267 bool r10_live = false;
11269 if (TARGET_64BIT)
11270 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11272 if (eax_live)
11274 insn = emit_insn (gen_push (eax));
11275 allocate -= UNITS_PER_WORD;
11276 /* Note that SEH directives need to continue tracking the stack
11277 pointer even after the frame pointer has been set up. */
11278 if (sp_is_cfa_reg || TARGET_SEH)
11280 if (sp_is_cfa_reg)
11281 m->fs.cfa_offset += UNITS_PER_WORD;
11282 RTX_FRAME_RELATED_P (insn) = 1;
11286 if (r10_live)
11288 r10 = gen_rtx_REG (Pmode, R10_REG);
11289 insn = emit_insn (gen_push (r10));
11290 allocate -= UNITS_PER_WORD;
11291 if (sp_is_cfa_reg || TARGET_SEH)
11293 if (sp_is_cfa_reg)
11294 m->fs.cfa_offset += UNITS_PER_WORD;
11295 RTX_FRAME_RELATED_P (insn) = 1;
11299 emit_move_insn (eax, GEN_INT (allocate));
11300 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11302 /* Use the fact that AX still contains ALLOCATE. */
11303 adjust_stack_insn = (Pmode == DImode
11304 ? gen_pro_epilogue_adjust_stack_di_sub
11305 : gen_pro_epilogue_adjust_stack_si_sub);
11307 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11308 stack_pointer_rtx, eax));
11310 if (sp_is_cfa_reg || TARGET_SEH)
11312 if (sp_is_cfa_reg)
11313 m->fs.cfa_offset += allocate;
11314 RTX_FRAME_RELATED_P (insn) = 1;
11315 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11316 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11317 plus_constant (Pmode, stack_pointer_rtx,
11318 -allocate)));
11320 m->fs.sp_offset += allocate;
11322 /* Use stack_pointer_rtx for relative addressing so that code
11323 works for realigned stack, too. */
11324 if (r10_live && eax_live)
11326 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11327 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11328 gen_frame_mem (word_mode, t));
11329 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11330 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11331 gen_frame_mem (word_mode, t));
11333 else if (eax_live || r10_live)
11335 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11336 emit_move_insn (gen_rtx_REG (word_mode,
11337 (eax_live ? AX_REG : R10_REG)),
11338 gen_frame_mem (word_mode, t));
11341 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11343 /* If we havn't already set up the frame pointer, do so now. */
11344 if (frame_pointer_needed && !m->fs.fp_valid)
11346 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11347 GEN_INT (frame.stack_pointer_offset
11348 - frame.hard_frame_pointer_offset));
11349 insn = emit_insn (insn);
11350 RTX_FRAME_RELATED_P (insn) = 1;
11351 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11353 if (m->fs.cfa_reg == stack_pointer_rtx)
11354 m->fs.cfa_reg = hard_frame_pointer_rtx;
11355 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11356 m->fs.fp_valid = true;
11359 if (!int_registers_saved)
11360 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11361 if (!sse_registers_saved)
11362 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11364 pic_reg_used = false;
11365 /* We don't use pic-register for pe-coff target. */
11366 if (pic_offset_table_rtx
11367 && !TARGET_PECOFF
11368 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11369 || crtl->profile))
11371 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11373 if (alt_pic_reg_used != INVALID_REGNUM)
11374 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11376 pic_reg_used = true;
11379 if (pic_reg_used)
11381 if (TARGET_64BIT)
11383 if (ix86_cmodel == CM_LARGE_PIC)
11385 rtx label, tmp_reg;
11387 gcc_assert (Pmode == DImode);
11388 label = gen_label_rtx ();
11389 emit_label (label);
11390 LABEL_PRESERVE_P (label) = 1;
11391 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11392 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11393 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11394 label));
11395 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11396 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11397 pic_offset_table_rtx, tmp_reg));
11399 else
11400 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11402 else
11404 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11405 RTX_FRAME_RELATED_P (insn) = 1;
11406 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11410 /* In the pic_reg_used case, make sure that the got load isn't deleted
11411 when mcount needs it. Blockage to avoid call movement across mcount
11412 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11413 note. */
11414 if (crtl->profile && !flag_fentry && pic_reg_used)
11415 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11417 if (crtl->drap_reg && !crtl->stack_realign_needed)
11419 /* vDRAP is setup but after reload it turns out stack realign
11420 isn't necessary, here we will emit prologue to setup DRAP
11421 without stack realign adjustment */
11422 t = choose_baseaddr (0);
11423 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11426 /* Prevent instructions from being scheduled into register save push
11427 sequence when access to the redzone area is done through frame pointer.
11428 The offset between the frame pointer and the stack pointer is calculated
11429 relative to the value of the stack pointer at the end of the function
11430 prologue, and moving instructions that access redzone area via frame
11431 pointer inside push sequence violates this assumption. */
11432 if (frame_pointer_needed && frame.red_zone_size)
11433 emit_insn (gen_memory_blockage ());
11435 /* Emit cld instruction if stringops are used in the function. */
11436 if (TARGET_CLD && ix86_current_function_needs_cld)
11437 emit_insn (gen_cld ());
11439 /* SEH requires that the prologue end within 256 bytes of the start of
11440 the function. Prevent instruction schedules that would extend that.
11441 Further, prevent alloca modifications to the stack pointer from being
11442 combined with prologue modifications. */
11443 if (TARGET_SEH)
11444 emit_insn (gen_prologue_use (stack_pointer_rtx));
11447 /* Emit code to restore REG using a POP insn. */
11449 static void
11450 ix86_emit_restore_reg_using_pop (rtx reg)
11452 struct machine_function *m = cfun->machine;
11453 rtx insn = emit_insn (gen_pop (reg));
11455 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11456 m->fs.sp_offset -= UNITS_PER_WORD;
11458 if (m->fs.cfa_reg == crtl->drap_reg
11459 && REGNO (reg) == REGNO (crtl->drap_reg))
11461 /* Previously we'd represented the CFA as an expression
11462 like *(%ebp - 8). We've just popped that value from
11463 the stack, which means we need to reset the CFA to
11464 the drap register. This will remain until we restore
11465 the stack pointer. */
11466 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11467 RTX_FRAME_RELATED_P (insn) = 1;
11469 /* This means that the DRAP register is valid for addressing too. */
11470 m->fs.drap_valid = true;
11471 return;
11474 if (m->fs.cfa_reg == stack_pointer_rtx)
11476 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11477 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11478 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11479 RTX_FRAME_RELATED_P (insn) = 1;
11481 m->fs.cfa_offset -= UNITS_PER_WORD;
11484 /* When the frame pointer is the CFA, and we pop it, we are
11485 swapping back to the stack pointer as the CFA. This happens
11486 for stack frames that don't allocate other data, so we assume
11487 the stack pointer is now pointing at the return address, i.e.
11488 the function entry state, which makes the offset be 1 word. */
11489 if (reg == hard_frame_pointer_rtx)
11491 m->fs.fp_valid = false;
11492 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11494 m->fs.cfa_reg = stack_pointer_rtx;
11495 m->fs.cfa_offset -= UNITS_PER_WORD;
11497 add_reg_note (insn, REG_CFA_DEF_CFA,
11498 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11499 GEN_INT (m->fs.cfa_offset)));
11500 RTX_FRAME_RELATED_P (insn) = 1;
11505 /* Emit code to restore saved registers using POP insns. */
11507 static void
11508 ix86_emit_restore_regs_using_pop (void)
11510 unsigned int regno;
11512 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11513 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11514 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11517 /* Emit code and notes for the LEAVE instruction. */
11519 static void
11520 ix86_emit_leave (void)
11522 struct machine_function *m = cfun->machine;
11523 rtx insn = emit_insn (ix86_gen_leave ());
11525 ix86_add_queued_cfa_restore_notes (insn);
11527 gcc_assert (m->fs.fp_valid);
11528 m->fs.sp_valid = true;
11529 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11530 m->fs.fp_valid = false;
11532 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11534 m->fs.cfa_reg = stack_pointer_rtx;
11535 m->fs.cfa_offset = m->fs.sp_offset;
11537 add_reg_note (insn, REG_CFA_DEF_CFA,
11538 plus_constant (Pmode, stack_pointer_rtx,
11539 m->fs.sp_offset));
11540 RTX_FRAME_RELATED_P (insn) = 1;
11542 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11543 m->fs.fp_offset);
11546 /* Emit code to restore saved registers using MOV insns.
11547 First register is restored from CFA - CFA_OFFSET. */
11548 static void
11549 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11550 bool maybe_eh_return)
11552 struct machine_function *m = cfun->machine;
11553 unsigned int regno;
11555 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11556 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11558 rtx reg = gen_rtx_REG (word_mode, regno);
11559 rtx insn, mem;
11561 mem = choose_baseaddr (cfa_offset);
11562 mem = gen_frame_mem (word_mode, mem);
11563 insn = emit_move_insn (reg, mem);
11565 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11567 /* Previously we'd represented the CFA as an expression
11568 like *(%ebp - 8). We've just popped that value from
11569 the stack, which means we need to reset the CFA to
11570 the drap register. This will remain until we restore
11571 the stack pointer. */
11572 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11573 RTX_FRAME_RELATED_P (insn) = 1;
11575 /* This means that the DRAP register is valid for addressing. */
11576 m->fs.drap_valid = true;
11578 else
11579 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11581 cfa_offset -= UNITS_PER_WORD;
11585 /* Emit code to restore saved registers using MOV insns.
11586 First register is restored from CFA - CFA_OFFSET. */
11587 static void
11588 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11589 bool maybe_eh_return)
11591 unsigned int regno;
11593 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11594 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11596 rtx reg = gen_rtx_REG (V4SFmode, regno);
11597 rtx mem;
11599 mem = choose_baseaddr (cfa_offset);
11600 mem = gen_rtx_MEM (V4SFmode, mem);
11601 set_mem_align (mem, 128);
11602 emit_move_insn (reg, mem);
11604 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11606 cfa_offset -= 16;
11610 /* Restore function stack, frame, and registers. */
11612 void
11613 ix86_expand_epilogue (int style)
11615 struct machine_function *m = cfun->machine;
11616 struct machine_frame_state frame_state_save = m->fs;
11617 struct ix86_frame frame;
11618 bool restore_regs_via_mov;
11619 bool using_drap;
11621 ix86_finalize_stack_realign_flags ();
11622 ix86_compute_frame_layout (&frame);
11624 m->fs.sp_valid = (!frame_pointer_needed
11625 || (crtl->sp_is_unchanging
11626 && !stack_realign_fp));
11627 gcc_assert (!m->fs.sp_valid
11628 || m->fs.sp_offset == frame.stack_pointer_offset);
11630 /* The FP must be valid if the frame pointer is present. */
11631 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11632 gcc_assert (!m->fs.fp_valid
11633 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11635 /* We must have *some* valid pointer to the stack frame. */
11636 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11638 /* The DRAP is never valid at this point. */
11639 gcc_assert (!m->fs.drap_valid);
11641 /* See the comment about red zone and frame
11642 pointer usage in ix86_expand_prologue. */
11643 if (frame_pointer_needed && frame.red_zone_size)
11644 emit_insn (gen_memory_blockage ());
11646 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11647 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11649 /* Determine the CFA offset of the end of the red-zone. */
11650 m->fs.red_zone_offset = 0;
11651 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11653 /* The red-zone begins below the return address. */
11654 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11656 /* When the register save area is in the aligned portion of
11657 the stack, determine the maximum runtime displacement that
11658 matches up with the aligned frame. */
11659 if (stack_realign_drap)
11660 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11661 + UNITS_PER_WORD);
11664 /* Special care must be taken for the normal return case of a function
11665 using eh_return: the eax and edx registers are marked as saved, but
11666 not restored along this path. Adjust the save location to match. */
11667 if (crtl->calls_eh_return && style != 2)
11668 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11670 /* EH_RETURN requires the use of moves to function properly. */
11671 if (crtl->calls_eh_return)
11672 restore_regs_via_mov = true;
11673 /* SEH requires the use of pops to identify the epilogue. */
11674 else if (TARGET_SEH)
11675 restore_regs_via_mov = false;
11676 /* If we're only restoring one register and sp is not valid then
11677 using a move instruction to restore the register since it's
11678 less work than reloading sp and popping the register. */
11679 else if (!m->fs.sp_valid && frame.nregs <= 1)
11680 restore_regs_via_mov = true;
11681 else if (TARGET_EPILOGUE_USING_MOVE
11682 && cfun->machine->use_fast_prologue_epilogue
11683 && (frame.nregs > 1
11684 || m->fs.sp_offset != frame.reg_save_offset))
11685 restore_regs_via_mov = true;
11686 else if (frame_pointer_needed
11687 && !frame.nregs
11688 && m->fs.sp_offset != frame.reg_save_offset)
11689 restore_regs_via_mov = true;
11690 else if (frame_pointer_needed
11691 && TARGET_USE_LEAVE
11692 && cfun->machine->use_fast_prologue_epilogue
11693 && frame.nregs == 1)
11694 restore_regs_via_mov = true;
11695 else
11696 restore_regs_via_mov = false;
11698 if (restore_regs_via_mov || frame.nsseregs)
11700 /* Ensure that the entire register save area is addressable via
11701 the stack pointer, if we will restore via sp. */
11702 if (TARGET_64BIT
11703 && m->fs.sp_offset > 0x7fffffff
11704 && !(m->fs.fp_valid || m->fs.drap_valid)
11705 && (frame.nsseregs + frame.nregs) != 0)
11707 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11708 GEN_INT (m->fs.sp_offset
11709 - frame.sse_reg_save_offset),
11710 style,
11711 m->fs.cfa_reg == stack_pointer_rtx);
11715 /* If there are any SSE registers to restore, then we have to do it
11716 via moves, since there's obviously no pop for SSE regs. */
11717 if (frame.nsseregs)
11718 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11719 style == 2);
11721 if (restore_regs_via_mov)
11723 rtx t;
11725 if (frame.nregs)
11726 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11728 /* eh_return epilogues need %ecx added to the stack pointer. */
11729 if (style == 2)
11731 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11733 /* Stack align doesn't work with eh_return. */
11734 gcc_assert (!stack_realign_drap);
11735 /* Neither does regparm nested functions. */
11736 gcc_assert (!ix86_static_chain_on_stack);
11738 if (frame_pointer_needed)
11740 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11741 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11742 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11744 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11745 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11747 /* Note that we use SA as a temporary CFA, as the return
11748 address is at the proper place relative to it. We
11749 pretend this happens at the FP restore insn because
11750 prior to this insn the FP would be stored at the wrong
11751 offset relative to SA, and after this insn we have no
11752 other reasonable register to use for the CFA. We don't
11753 bother resetting the CFA to the SP for the duration of
11754 the return insn. */
11755 add_reg_note (insn, REG_CFA_DEF_CFA,
11756 plus_constant (Pmode, sa, UNITS_PER_WORD));
11757 ix86_add_queued_cfa_restore_notes (insn);
11758 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11759 RTX_FRAME_RELATED_P (insn) = 1;
11761 m->fs.cfa_reg = sa;
11762 m->fs.cfa_offset = UNITS_PER_WORD;
11763 m->fs.fp_valid = false;
11765 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11766 const0_rtx, style, false);
11768 else
11770 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11771 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11772 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11773 ix86_add_queued_cfa_restore_notes (insn);
11775 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11776 if (m->fs.cfa_offset != UNITS_PER_WORD)
11778 m->fs.cfa_offset = UNITS_PER_WORD;
11779 add_reg_note (insn, REG_CFA_DEF_CFA,
11780 plus_constant (Pmode, stack_pointer_rtx,
11781 UNITS_PER_WORD));
11782 RTX_FRAME_RELATED_P (insn) = 1;
11785 m->fs.sp_offset = UNITS_PER_WORD;
11786 m->fs.sp_valid = true;
11789 else
11791 /* SEH requires that the function end with (1) a stack adjustment
11792 if necessary, (2) a sequence of pops, and (3) a return or
11793 jump instruction. Prevent insns from the function body from
11794 being scheduled into this sequence. */
11795 if (TARGET_SEH)
11797 /* Prevent a catch region from being adjacent to the standard
11798 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11799 several other flags that would be interesting to test are
11800 not yet set up. */
11801 if (flag_non_call_exceptions)
11802 emit_insn (gen_nops (const1_rtx));
11803 else
11804 emit_insn (gen_blockage ());
11807 /* First step is to deallocate the stack frame so that we can
11808 pop the registers. Also do it on SEH target for very large
11809 frame as the emitted instructions aren't allowed by the ABI in
11810 epilogues. */
11811 if (!m->fs.sp_valid
11812 || (TARGET_SEH
11813 && (m->fs.sp_offset - frame.reg_save_offset
11814 >= SEH_MAX_FRAME_SIZE)))
11816 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11817 GEN_INT (m->fs.fp_offset
11818 - frame.reg_save_offset),
11819 style, false);
11821 else if (m->fs.sp_offset != frame.reg_save_offset)
11823 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11824 GEN_INT (m->fs.sp_offset
11825 - frame.reg_save_offset),
11826 style,
11827 m->fs.cfa_reg == stack_pointer_rtx);
11830 ix86_emit_restore_regs_using_pop ();
11833 /* If we used a stack pointer and haven't already got rid of it,
11834 then do so now. */
11835 if (m->fs.fp_valid)
11837 /* If the stack pointer is valid and pointing at the frame
11838 pointer store address, then we only need a pop. */
11839 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11840 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11841 /* Leave results in shorter dependency chains on CPUs that are
11842 able to grok it fast. */
11843 else if (TARGET_USE_LEAVE
11844 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11845 || !cfun->machine->use_fast_prologue_epilogue)
11846 ix86_emit_leave ();
11847 else
11849 pro_epilogue_adjust_stack (stack_pointer_rtx,
11850 hard_frame_pointer_rtx,
11851 const0_rtx, style, !using_drap);
11852 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11856 if (using_drap)
11858 int param_ptr_offset = UNITS_PER_WORD;
11859 rtx insn;
11861 gcc_assert (stack_realign_drap);
11863 if (ix86_static_chain_on_stack)
11864 param_ptr_offset += UNITS_PER_WORD;
11865 if (!call_used_regs[REGNO (crtl->drap_reg)])
11866 param_ptr_offset += UNITS_PER_WORD;
11868 insn = emit_insn (gen_rtx_SET
11869 (VOIDmode, stack_pointer_rtx,
11870 gen_rtx_PLUS (Pmode,
11871 crtl->drap_reg,
11872 GEN_INT (-param_ptr_offset))));
11873 m->fs.cfa_reg = stack_pointer_rtx;
11874 m->fs.cfa_offset = param_ptr_offset;
11875 m->fs.sp_offset = param_ptr_offset;
11876 m->fs.realigned = false;
11878 add_reg_note (insn, REG_CFA_DEF_CFA,
11879 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11880 GEN_INT (param_ptr_offset)));
11881 RTX_FRAME_RELATED_P (insn) = 1;
11883 if (!call_used_regs[REGNO (crtl->drap_reg)])
11884 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11887 /* At this point the stack pointer must be valid, and we must have
11888 restored all of the registers. We may not have deallocated the
11889 entire stack frame. We've delayed this until now because it may
11890 be possible to merge the local stack deallocation with the
11891 deallocation forced by ix86_static_chain_on_stack. */
11892 gcc_assert (m->fs.sp_valid);
11893 gcc_assert (!m->fs.fp_valid);
11894 gcc_assert (!m->fs.realigned);
11895 if (m->fs.sp_offset != UNITS_PER_WORD)
11897 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11898 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11899 style, true);
11901 else
11902 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11904 /* Sibcall epilogues don't want a return instruction. */
11905 if (style == 0)
11907 m->fs = frame_state_save;
11908 return;
11911 if (crtl->args.pops_args && crtl->args.size)
11913 rtx popc = GEN_INT (crtl->args.pops_args);
11915 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11916 address, do explicit add, and jump indirectly to the caller. */
11918 if (crtl->args.pops_args >= 65536)
11920 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11921 rtx insn;
11923 /* There is no "pascal" calling convention in any 64bit ABI. */
11924 gcc_assert (!TARGET_64BIT);
11926 insn = emit_insn (gen_pop (ecx));
11927 m->fs.cfa_offset -= UNITS_PER_WORD;
11928 m->fs.sp_offset -= UNITS_PER_WORD;
11930 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11931 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11932 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11933 add_reg_note (insn, REG_CFA_REGISTER,
11934 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11935 RTX_FRAME_RELATED_P (insn) = 1;
11937 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11938 popc, -1, true);
11939 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11941 else
11942 emit_jump_insn (gen_simple_return_pop_internal (popc));
11944 else
11945 emit_jump_insn (gen_simple_return_internal ());
11947 /* Restore the state back to the state from the prologue,
11948 so that it's correct for the next epilogue. */
11949 m->fs = frame_state_save;
11952 /* Reset from the function's potential modifications. */
11954 static void
11955 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11956 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11958 if (pic_offset_table_rtx)
11959 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11960 #if TARGET_MACHO
11961 /* Mach-O doesn't support labels at the end of objects, so if
11962 it looks like we might want one, insert a NOP. */
11964 rtx insn = get_last_insn ();
11965 rtx deleted_debug_label = NULL_RTX;
11966 while (insn
11967 && NOTE_P (insn)
11968 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11970 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11971 notes only, instead set their CODE_LABEL_NUMBER to -1,
11972 otherwise there would be code generation differences
11973 in between -g and -g0. */
11974 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11975 deleted_debug_label = insn;
11976 insn = PREV_INSN (insn);
11978 if (insn
11979 && (LABEL_P (insn)
11980 || (NOTE_P (insn)
11981 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11982 fputs ("\tnop\n", file);
11983 else if (deleted_debug_label)
11984 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11985 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11986 CODE_LABEL_NUMBER (insn) = -1;
11988 #endif
11992 /* Return a scratch register to use in the split stack prologue. The
11993 split stack prologue is used for -fsplit-stack. It is the first
11994 instructions in the function, even before the regular prologue.
11995 The scratch register can be any caller-saved register which is not
11996 used for parameters or for the static chain. */
11998 static unsigned int
11999 split_stack_prologue_scratch_regno (void)
12001 if (TARGET_64BIT)
12002 return R11_REG;
12003 else
12005 bool is_fastcall, is_thiscall;
12006 int regparm;
12008 is_fastcall = (lookup_attribute ("fastcall",
12009 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12010 != NULL);
12011 is_thiscall = (lookup_attribute ("thiscall",
12012 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12013 != NULL);
12014 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12016 if (is_fastcall)
12018 if (DECL_STATIC_CHAIN (cfun->decl))
12020 sorry ("-fsplit-stack does not support fastcall with "
12021 "nested function");
12022 return INVALID_REGNUM;
12024 return AX_REG;
12026 else if (is_thiscall)
12028 if (!DECL_STATIC_CHAIN (cfun->decl))
12029 return DX_REG;
12030 return AX_REG;
12032 else if (regparm < 3)
12034 if (!DECL_STATIC_CHAIN (cfun->decl))
12035 return CX_REG;
12036 else
12038 if (regparm >= 2)
12040 sorry ("-fsplit-stack does not support 2 register "
12041 " parameters for a nested function");
12042 return INVALID_REGNUM;
12044 return DX_REG;
12047 else
12049 /* FIXME: We could make this work by pushing a register
12050 around the addition and comparison. */
12051 sorry ("-fsplit-stack does not support 3 register parameters");
12052 return INVALID_REGNUM;
12057 /* A SYMBOL_REF for the function which allocates new stackspace for
12058 -fsplit-stack. */
12060 static GTY(()) rtx split_stack_fn;
12062 /* A SYMBOL_REF for the more stack function when using the large
12063 model. */
12065 static GTY(()) rtx split_stack_fn_large;
12067 /* Handle -fsplit-stack. These are the first instructions in the
12068 function, even before the regular prologue. */
12070 void
12071 ix86_expand_split_stack_prologue (void)
12073 struct ix86_frame frame;
12074 HOST_WIDE_INT allocate;
12075 unsigned HOST_WIDE_INT args_size;
12076 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12077 rtx scratch_reg = NULL_RTX;
12078 rtx varargs_label = NULL_RTX;
12079 rtx fn;
12081 gcc_assert (flag_split_stack && reload_completed);
12083 ix86_finalize_stack_realign_flags ();
12084 ix86_compute_frame_layout (&frame);
12085 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12087 /* This is the label we will branch to if we have enough stack
12088 space. We expect the basic block reordering pass to reverse this
12089 branch if optimizing, so that we branch in the unlikely case. */
12090 label = gen_label_rtx ();
12092 /* We need to compare the stack pointer minus the frame size with
12093 the stack boundary in the TCB. The stack boundary always gives
12094 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12095 can compare directly. Otherwise we need to do an addition. */
12097 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12098 UNSPEC_STACK_CHECK);
12099 limit = gen_rtx_CONST (Pmode, limit);
12100 limit = gen_rtx_MEM (Pmode, limit);
12101 if (allocate < SPLIT_STACK_AVAILABLE)
12102 current = stack_pointer_rtx;
12103 else
12105 unsigned int scratch_regno;
12106 rtx offset;
12108 /* We need a scratch register to hold the stack pointer minus
12109 the required frame size. Since this is the very start of the
12110 function, the scratch register can be any caller-saved
12111 register which is not used for parameters. */
12112 offset = GEN_INT (- allocate);
12113 scratch_regno = split_stack_prologue_scratch_regno ();
12114 if (scratch_regno == INVALID_REGNUM)
12115 return;
12116 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12117 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12119 /* We don't use ix86_gen_add3 in this case because it will
12120 want to split to lea, but when not optimizing the insn
12121 will not be split after this point. */
12122 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12123 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12124 offset)));
12126 else
12128 emit_move_insn (scratch_reg, offset);
12129 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12130 stack_pointer_rtx));
12132 current = scratch_reg;
12135 ix86_expand_branch (GEU, current, limit, label);
12136 jump_insn = get_last_insn ();
12137 JUMP_LABEL (jump_insn) = label;
12139 /* Mark the jump as very likely to be taken. */
12140 add_int_reg_note (jump_insn, REG_BR_PROB,
12141 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12143 if (split_stack_fn == NULL_RTX)
12144 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12145 fn = split_stack_fn;
12147 /* Get more stack space. We pass in the desired stack space and the
12148 size of the arguments to copy to the new stack. In 32-bit mode
12149 we push the parameters; __morestack will return on a new stack
12150 anyhow. In 64-bit mode we pass the parameters in r10 and
12151 r11. */
12152 allocate_rtx = GEN_INT (allocate);
12153 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12154 call_fusage = NULL_RTX;
12155 if (TARGET_64BIT)
12157 rtx reg10, reg11;
12159 reg10 = gen_rtx_REG (Pmode, R10_REG);
12160 reg11 = gen_rtx_REG (Pmode, R11_REG);
12162 /* If this function uses a static chain, it will be in %r10.
12163 Preserve it across the call to __morestack. */
12164 if (DECL_STATIC_CHAIN (cfun->decl))
12166 rtx rax;
12168 rax = gen_rtx_REG (word_mode, AX_REG);
12169 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12170 use_reg (&call_fusage, rax);
12173 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12174 && !TARGET_PECOFF)
12176 HOST_WIDE_INT argval;
12178 gcc_assert (Pmode == DImode);
12179 /* When using the large model we need to load the address
12180 into a register, and we've run out of registers. So we
12181 switch to a different calling convention, and we call a
12182 different function: __morestack_large. We pass the
12183 argument size in the upper 32 bits of r10 and pass the
12184 frame size in the lower 32 bits. */
12185 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12186 gcc_assert ((args_size & 0xffffffff) == args_size);
12188 if (split_stack_fn_large == NULL_RTX)
12189 split_stack_fn_large =
12190 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12192 if (ix86_cmodel == CM_LARGE_PIC)
12194 rtx label, x;
12196 label = gen_label_rtx ();
12197 emit_label (label);
12198 LABEL_PRESERVE_P (label) = 1;
12199 emit_insn (gen_set_rip_rex64 (reg10, label));
12200 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12201 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12202 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12203 UNSPEC_GOT);
12204 x = gen_rtx_CONST (Pmode, x);
12205 emit_move_insn (reg11, x);
12206 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12207 x = gen_const_mem (Pmode, x);
12208 emit_move_insn (reg11, x);
12210 else
12211 emit_move_insn (reg11, split_stack_fn_large);
12213 fn = reg11;
12215 argval = ((args_size << 16) << 16) + allocate;
12216 emit_move_insn (reg10, GEN_INT (argval));
12218 else
12220 emit_move_insn (reg10, allocate_rtx);
12221 emit_move_insn (reg11, GEN_INT (args_size));
12222 use_reg (&call_fusage, reg11);
12225 use_reg (&call_fusage, reg10);
12227 else
12229 emit_insn (gen_push (GEN_INT (args_size)));
12230 emit_insn (gen_push (allocate_rtx));
12232 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12233 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12234 NULL_RTX, false);
12235 add_function_usage_to (call_insn, call_fusage);
12237 /* In order to make call/return prediction work right, we now need
12238 to execute a return instruction. See
12239 libgcc/config/i386/morestack.S for the details on how this works.
12241 For flow purposes gcc must not see this as a return
12242 instruction--we need control flow to continue at the subsequent
12243 label. Therefore, we use an unspec. */
12244 gcc_assert (crtl->args.pops_args < 65536);
12245 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12247 /* If we are in 64-bit mode and this function uses a static chain,
12248 we saved %r10 in %rax before calling _morestack. */
12249 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12250 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12251 gen_rtx_REG (word_mode, AX_REG));
12253 /* If this function calls va_start, we need to store a pointer to
12254 the arguments on the old stack, because they may not have been
12255 all copied to the new stack. At this point the old stack can be
12256 found at the frame pointer value used by __morestack, because
12257 __morestack has set that up before calling back to us. Here we
12258 store that pointer in a scratch register, and in
12259 ix86_expand_prologue we store the scratch register in a stack
12260 slot. */
12261 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12263 unsigned int scratch_regno;
12264 rtx frame_reg;
12265 int words;
12267 scratch_regno = split_stack_prologue_scratch_regno ();
12268 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12269 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12271 /* 64-bit:
12272 fp -> old fp value
12273 return address within this function
12274 return address of caller of this function
12275 stack arguments
12276 So we add three words to get to the stack arguments.
12278 32-bit:
12279 fp -> old fp value
12280 return address within this function
12281 first argument to __morestack
12282 second argument to __morestack
12283 return address of caller of this function
12284 stack arguments
12285 So we add five words to get to the stack arguments.
12287 words = TARGET_64BIT ? 3 : 5;
12288 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12289 gen_rtx_PLUS (Pmode, frame_reg,
12290 GEN_INT (words * UNITS_PER_WORD))));
12292 varargs_label = gen_label_rtx ();
12293 emit_jump_insn (gen_jump (varargs_label));
12294 JUMP_LABEL (get_last_insn ()) = varargs_label;
12296 emit_barrier ();
12299 emit_label (label);
12300 LABEL_NUSES (label) = 1;
12302 /* If this function calls va_start, we now have to set the scratch
12303 register for the case where we do not call __morestack. In this
12304 case we need to set it based on the stack pointer. */
12305 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12307 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12308 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12309 GEN_INT (UNITS_PER_WORD))));
12311 emit_label (varargs_label);
12312 LABEL_NUSES (varargs_label) = 1;
12316 /* We may have to tell the dataflow pass that the split stack prologue
12317 is initializing a scratch register. */
12319 static void
12320 ix86_live_on_entry (bitmap regs)
12322 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12324 gcc_assert (flag_split_stack);
12325 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12329 /* Extract the parts of an RTL expression that is a valid memory address
12330 for an instruction. Return 0 if the structure of the address is
12331 grossly off. Return -1 if the address contains ASHIFT, so it is not
12332 strictly valid, but still used for computing length of lea instruction. */
12335 ix86_decompose_address (rtx addr, struct ix86_address *out)
12337 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12338 rtx base_reg, index_reg;
12339 HOST_WIDE_INT scale = 1;
12340 rtx scale_rtx = NULL_RTX;
12341 rtx tmp;
12342 int retval = 1;
12343 enum ix86_address_seg seg = SEG_DEFAULT;
12345 /* Allow zero-extended SImode addresses,
12346 they will be emitted with addr32 prefix. */
12347 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12349 if (GET_CODE (addr) == ZERO_EXTEND
12350 && GET_MODE (XEXP (addr, 0)) == SImode)
12352 addr = XEXP (addr, 0);
12353 if (CONST_INT_P (addr))
12354 return 0;
12356 else if (GET_CODE (addr) == AND
12357 && const_32bit_mask (XEXP (addr, 1), DImode))
12359 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12360 if (addr == NULL_RTX)
12361 return 0;
12363 if (CONST_INT_P (addr))
12364 return 0;
12368 /* Allow SImode subregs of DImode addresses,
12369 they will be emitted with addr32 prefix. */
12370 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12372 if (GET_CODE (addr) == SUBREG
12373 && GET_MODE (SUBREG_REG (addr)) == DImode)
12375 addr = SUBREG_REG (addr);
12376 if (CONST_INT_P (addr))
12377 return 0;
12381 if (REG_P (addr))
12382 base = addr;
12383 else if (GET_CODE (addr) == SUBREG)
12385 if (REG_P (SUBREG_REG (addr)))
12386 base = addr;
12387 else
12388 return 0;
12390 else if (GET_CODE (addr) == PLUS)
12392 rtx addends[4], op;
12393 int n = 0, i;
12395 op = addr;
12398 if (n >= 4)
12399 return 0;
12400 addends[n++] = XEXP (op, 1);
12401 op = XEXP (op, 0);
12403 while (GET_CODE (op) == PLUS);
12404 if (n >= 4)
12405 return 0;
12406 addends[n] = op;
12408 for (i = n; i >= 0; --i)
12410 op = addends[i];
12411 switch (GET_CODE (op))
12413 case MULT:
12414 if (index)
12415 return 0;
12416 index = XEXP (op, 0);
12417 scale_rtx = XEXP (op, 1);
12418 break;
12420 case ASHIFT:
12421 if (index)
12422 return 0;
12423 index = XEXP (op, 0);
12424 tmp = XEXP (op, 1);
12425 if (!CONST_INT_P (tmp))
12426 return 0;
12427 scale = INTVAL (tmp);
12428 if ((unsigned HOST_WIDE_INT) scale > 3)
12429 return 0;
12430 scale = 1 << scale;
12431 break;
12433 case ZERO_EXTEND:
12434 op = XEXP (op, 0);
12435 if (GET_CODE (op) != UNSPEC)
12436 return 0;
12437 /* FALLTHRU */
12439 case UNSPEC:
12440 if (XINT (op, 1) == UNSPEC_TP
12441 && TARGET_TLS_DIRECT_SEG_REFS
12442 && seg == SEG_DEFAULT)
12443 seg = DEFAULT_TLS_SEG_REG;
12444 else
12445 return 0;
12446 break;
12448 case SUBREG:
12449 if (!REG_P (SUBREG_REG (op)))
12450 return 0;
12451 /* FALLTHRU */
12453 case REG:
12454 if (!base)
12455 base = op;
12456 else if (!index)
12457 index = op;
12458 else
12459 return 0;
12460 break;
12462 case CONST:
12463 case CONST_INT:
12464 case SYMBOL_REF:
12465 case LABEL_REF:
12466 if (disp)
12467 return 0;
12468 disp = op;
12469 break;
12471 default:
12472 return 0;
12476 else if (GET_CODE (addr) == MULT)
12478 index = XEXP (addr, 0); /* index*scale */
12479 scale_rtx = XEXP (addr, 1);
12481 else if (GET_CODE (addr) == ASHIFT)
12483 /* We're called for lea too, which implements ashift on occasion. */
12484 index = XEXP (addr, 0);
12485 tmp = XEXP (addr, 1);
12486 if (!CONST_INT_P (tmp))
12487 return 0;
12488 scale = INTVAL (tmp);
12489 if ((unsigned HOST_WIDE_INT) scale > 3)
12490 return 0;
12491 scale = 1 << scale;
12492 retval = -1;
12494 else
12495 disp = addr; /* displacement */
12497 if (index)
12499 if (REG_P (index))
12501 else if (GET_CODE (index) == SUBREG
12502 && REG_P (SUBREG_REG (index)))
12504 else
12505 return 0;
12508 /* Extract the integral value of scale. */
12509 if (scale_rtx)
12511 if (!CONST_INT_P (scale_rtx))
12512 return 0;
12513 scale = INTVAL (scale_rtx);
12516 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12517 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12519 /* Avoid useless 0 displacement. */
12520 if (disp == const0_rtx && (base || index))
12521 disp = NULL_RTX;
12523 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12524 if (base_reg && index_reg && scale == 1
12525 && (index_reg == arg_pointer_rtx
12526 || index_reg == frame_pointer_rtx
12527 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12529 rtx tmp;
12530 tmp = base, base = index, index = tmp;
12531 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12534 /* Special case: %ebp cannot be encoded as a base without a displacement.
12535 Similarly %r13. */
12536 if (!disp
12537 && base_reg
12538 && (base_reg == hard_frame_pointer_rtx
12539 || base_reg == frame_pointer_rtx
12540 || base_reg == arg_pointer_rtx
12541 || (REG_P (base_reg)
12542 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12543 || REGNO (base_reg) == R13_REG))))
12544 disp = const0_rtx;
12546 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12547 Avoid this by transforming to [%esi+0].
12548 Reload calls address legitimization without cfun defined, so we need
12549 to test cfun for being non-NULL. */
12550 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12551 && base_reg && !index_reg && !disp
12552 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12553 disp = const0_rtx;
12555 /* Special case: encode reg+reg instead of reg*2. */
12556 if (!base && index && scale == 2)
12557 base = index, base_reg = index_reg, scale = 1;
12559 /* Special case: scaling cannot be encoded without base or displacement. */
12560 if (!base && !disp && index && scale != 1)
12561 disp = const0_rtx;
12563 out->base = base;
12564 out->index = index;
12565 out->disp = disp;
12566 out->scale = scale;
12567 out->seg = seg;
12569 return retval;
12572 /* Return cost of the memory address x.
12573 For i386, it is better to use a complex address than let gcc copy
12574 the address into a reg and make a new pseudo. But not if the address
12575 requires to two regs - that would mean more pseudos with longer
12576 lifetimes. */
12577 static int
12578 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12579 addr_space_t as ATTRIBUTE_UNUSED,
12580 bool speed ATTRIBUTE_UNUSED)
12582 struct ix86_address parts;
12583 int cost = 1;
12584 int ok = ix86_decompose_address (x, &parts);
12586 gcc_assert (ok);
12588 if (parts.base && GET_CODE (parts.base) == SUBREG)
12589 parts.base = SUBREG_REG (parts.base);
12590 if (parts.index && GET_CODE (parts.index) == SUBREG)
12591 parts.index = SUBREG_REG (parts.index);
12593 /* Attempt to minimize number of registers in the address. */
12594 if ((parts.base
12595 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12596 || (parts.index
12597 && (!REG_P (parts.index)
12598 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12599 cost++;
12601 if (parts.base
12602 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12603 && parts.index
12604 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12605 && parts.base != parts.index)
12606 cost++;
12608 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12609 since it's predecode logic can't detect the length of instructions
12610 and it degenerates to vector decoded. Increase cost of such
12611 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12612 to split such addresses or even refuse such addresses at all.
12614 Following addressing modes are affected:
12615 [base+scale*index]
12616 [scale*index+disp]
12617 [base+index]
12619 The first and last case may be avoidable by explicitly coding the zero in
12620 memory address, but I don't have AMD-K6 machine handy to check this
12621 theory. */
12623 if (TARGET_K6
12624 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12625 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12626 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12627 cost += 10;
12629 return cost;
12632 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12633 this is used for to form addresses to local data when -fPIC is in
12634 use. */
12636 static bool
12637 darwin_local_data_pic (rtx disp)
12639 return (GET_CODE (disp) == UNSPEC
12640 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12643 /* Determine if a given RTX is a valid constant. We already know this
12644 satisfies CONSTANT_P. */
12646 static bool
12647 ix86_legitimate_constant_p (enum machine_mode mode, rtx x)
12649 /* Pointer bounds constants are not valid. */
12650 if (POINTER_BOUNDS_MODE_P (mode))
12651 return false;
12653 switch (GET_CODE (x))
12655 case CONST:
12656 x = XEXP (x, 0);
12658 if (GET_CODE (x) == PLUS)
12660 if (!CONST_INT_P (XEXP (x, 1)))
12661 return false;
12662 x = XEXP (x, 0);
12665 if (TARGET_MACHO && darwin_local_data_pic (x))
12666 return true;
12668 /* Only some unspecs are valid as "constants". */
12669 if (GET_CODE (x) == UNSPEC)
12670 switch (XINT (x, 1))
12672 case UNSPEC_GOT:
12673 case UNSPEC_GOTOFF:
12674 case UNSPEC_PLTOFF:
12675 return TARGET_64BIT;
12676 case UNSPEC_TPOFF:
12677 case UNSPEC_NTPOFF:
12678 x = XVECEXP (x, 0, 0);
12679 return (GET_CODE (x) == SYMBOL_REF
12680 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12681 case UNSPEC_DTPOFF:
12682 x = XVECEXP (x, 0, 0);
12683 return (GET_CODE (x) == SYMBOL_REF
12684 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12685 default:
12686 return false;
12689 /* We must have drilled down to a symbol. */
12690 if (GET_CODE (x) == LABEL_REF)
12691 return true;
12692 if (GET_CODE (x) != SYMBOL_REF)
12693 return false;
12694 /* FALLTHRU */
12696 case SYMBOL_REF:
12697 /* TLS symbols are never valid. */
12698 if (SYMBOL_REF_TLS_MODEL (x))
12699 return false;
12701 /* DLLIMPORT symbols are never valid. */
12702 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12703 && SYMBOL_REF_DLLIMPORT_P (x))
12704 return false;
12706 #if TARGET_MACHO
12707 /* mdynamic-no-pic */
12708 if (MACHO_DYNAMIC_NO_PIC_P)
12709 return machopic_symbol_defined_p (x);
12710 #endif
12711 break;
12713 case CONST_DOUBLE:
12714 if (GET_MODE (x) == TImode
12715 && x != CONST0_RTX (TImode)
12716 && !TARGET_64BIT)
12717 return false;
12718 break;
12720 case CONST_VECTOR:
12721 if (!standard_sse_constant_p (x))
12722 return false;
12724 default:
12725 break;
12728 /* Otherwise we handle everything else in the move patterns. */
12729 return true;
12732 /* Determine if it's legal to put X into the constant pool. This
12733 is not possible for the address of thread-local symbols, which
12734 is checked above. */
12736 static bool
12737 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12739 /* We can always put integral constants and vectors in memory. */
12740 switch (GET_CODE (x))
12742 case CONST_INT:
12743 case CONST_DOUBLE:
12744 case CONST_VECTOR:
12745 return false;
12747 default:
12748 break;
12750 return !ix86_legitimate_constant_p (mode, x);
12753 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12754 otherwise zero. */
12756 static bool
12757 is_imported_p (rtx x)
12759 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12760 || GET_CODE (x) != SYMBOL_REF)
12761 return false;
12763 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12767 /* Nonzero if the constant value X is a legitimate general operand
12768 when generating PIC code. It is given that flag_pic is on and
12769 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12771 bool
12772 legitimate_pic_operand_p (rtx x)
12774 rtx inner;
12776 switch (GET_CODE (x))
12778 case CONST:
12779 inner = XEXP (x, 0);
12780 if (GET_CODE (inner) == PLUS
12781 && CONST_INT_P (XEXP (inner, 1)))
12782 inner = XEXP (inner, 0);
12784 /* Only some unspecs are valid as "constants". */
12785 if (GET_CODE (inner) == UNSPEC)
12786 switch (XINT (inner, 1))
12788 case UNSPEC_GOT:
12789 case UNSPEC_GOTOFF:
12790 case UNSPEC_PLTOFF:
12791 return TARGET_64BIT;
12792 case UNSPEC_TPOFF:
12793 x = XVECEXP (inner, 0, 0);
12794 return (GET_CODE (x) == SYMBOL_REF
12795 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12796 case UNSPEC_MACHOPIC_OFFSET:
12797 return legitimate_pic_address_disp_p (x);
12798 default:
12799 return false;
12801 /* FALLTHRU */
12803 case SYMBOL_REF:
12804 case LABEL_REF:
12805 return legitimate_pic_address_disp_p (x);
12807 default:
12808 return true;
12812 /* Determine if a given CONST RTX is a valid memory displacement
12813 in PIC mode. */
12815 bool
12816 legitimate_pic_address_disp_p (rtx disp)
12818 bool saw_plus;
12820 /* In 64bit mode we can allow direct addresses of symbols and labels
12821 when they are not dynamic symbols. */
12822 if (TARGET_64BIT)
12824 rtx op0 = disp, op1;
12826 switch (GET_CODE (disp))
12828 case LABEL_REF:
12829 return true;
12831 case CONST:
12832 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12833 break;
12834 op0 = XEXP (XEXP (disp, 0), 0);
12835 op1 = XEXP (XEXP (disp, 0), 1);
12836 if (!CONST_INT_P (op1)
12837 || INTVAL (op1) >= 16*1024*1024
12838 || INTVAL (op1) < -16*1024*1024)
12839 break;
12840 if (GET_CODE (op0) == LABEL_REF)
12841 return true;
12842 if (GET_CODE (op0) == CONST
12843 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12844 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12845 return true;
12846 if (GET_CODE (op0) == UNSPEC
12847 && XINT (op0, 1) == UNSPEC_PCREL)
12848 return true;
12849 if (GET_CODE (op0) != SYMBOL_REF)
12850 break;
12851 /* FALLTHRU */
12853 case SYMBOL_REF:
12854 /* TLS references should always be enclosed in UNSPEC.
12855 The dllimported symbol needs always to be resolved. */
12856 if (SYMBOL_REF_TLS_MODEL (op0)
12857 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12858 return false;
12860 if (TARGET_PECOFF)
12862 if (is_imported_p (op0))
12863 return true;
12865 if (SYMBOL_REF_FAR_ADDR_P (op0)
12866 || !SYMBOL_REF_LOCAL_P (op0))
12867 break;
12869 /* Function-symbols need to be resolved only for
12870 large-model.
12871 For the small-model we don't need to resolve anything
12872 here. */
12873 if ((ix86_cmodel != CM_LARGE_PIC
12874 && SYMBOL_REF_FUNCTION_P (op0))
12875 || ix86_cmodel == CM_SMALL_PIC)
12876 return true;
12877 /* Non-external symbols don't need to be resolved for
12878 large, and medium-model. */
12879 if ((ix86_cmodel == CM_LARGE_PIC
12880 || ix86_cmodel == CM_MEDIUM_PIC)
12881 && !SYMBOL_REF_EXTERNAL_P (op0))
12882 return true;
12884 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12885 && SYMBOL_REF_LOCAL_P (op0)
12886 && ix86_cmodel != CM_LARGE_PIC)
12887 return true;
12888 break;
12890 default:
12891 break;
12894 if (GET_CODE (disp) != CONST)
12895 return false;
12896 disp = XEXP (disp, 0);
12898 if (TARGET_64BIT)
12900 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12901 of GOT tables. We should not need these anyway. */
12902 if (GET_CODE (disp) != UNSPEC
12903 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12904 && XINT (disp, 1) != UNSPEC_GOTOFF
12905 && XINT (disp, 1) != UNSPEC_PCREL
12906 && XINT (disp, 1) != UNSPEC_PLTOFF))
12907 return false;
12909 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12910 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12911 return false;
12912 return true;
12915 saw_plus = false;
12916 if (GET_CODE (disp) == PLUS)
12918 if (!CONST_INT_P (XEXP (disp, 1)))
12919 return false;
12920 disp = XEXP (disp, 0);
12921 saw_plus = true;
12924 if (TARGET_MACHO && darwin_local_data_pic (disp))
12925 return true;
12927 if (GET_CODE (disp) != UNSPEC)
12928 return false;
12930 switch (XINT (disp, 1))
12932 case UNSPEC_GOT:
12933 if (saw_plus)
12934 return false;
12935 /* We need to check for both symbols and labels because VxWorks loads
12936 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12937 details. */
12938 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12939 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12940 case UNSPEC_GOTOFF:
12941 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12942 While ABI specify also 32bit relocation but we don't produce it in
12943 small PIC model at all. */
12944 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12945 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12946 && !TARGET_64BIT)
12947 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12948 return false;
12949 case UNSPEC_GOTTPOFF:
12950 case UNSPEC_GOTNTPOFF:
12951 case UNSPEC_INDNTPOFF:
12952 if (saw_plus)
12953 return false;
12954 disp = XVECEXP (disp, 0, 0);
12955 return (GET_CODE (disp) == SYMBOL_REF
12956 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12957 case UNSPEC_NTPOFF:
12958 disp = XVECEXP (disp, 0, 0);
12959 return (GET_CODE (disp) == SYMBOL_REF
12960 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12961 case UNSPEC_DTPOFF:
12962 disp = XVECEXP (disp, 0, 0);
12963 return (GET_CODE (disp) == SYMBOL_REF
12964 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12967 return false;
12970 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12971 replace the input X, or the original X if no replacement is called for.
12972 The output parameter *WIN is 1 if the calling macro should goto WIN,
12973 0 if it should not. */
12975 bool
12976 ix86_legitimize_reload_address (rtx x,
12977 enum machine_mode mode ATTRIBUTE_UNUSED,
12978 int opnum, int type,
12979 int ind_levels ATTRIBUTE_UNUSED)
12981 /* Reload can generate:
12983 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12984 (reg:DI 97))
12985 (reg:DI 2 cx))
12987 This RTX is rejected from ix86_legitimate_address_p due to
12988 non-strictness of base register 97. Following this rejection,
12989 reload pushes all three components into separate registers,
12990 creating invalid memory address RTX.
12992 Following code reloads only the invalid part of the
12993 memory address RTX. */
12995 if (GET_CODE (x) == PLUS
12996 && REG_P (XEXP (x, 1))
12997 && GET_CODE (XEXP (x, 0)) == PLUS
12998 && REG_P (XEXP (XEXP (x, 0), 1)))
13000 rtx base, index;
13001 bool something_reloaded = false;
13003 base = XEXP (XEXP (x, 0), 1);
13004 if (!REG_OK_FOR_BASE_STRICT_P (base))
13006 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13007 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13008 opnum, (enum reload_type) type);
13009 something_reloaded = true;
13012 index = XEXP (x, 1);
13013 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13015 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13016 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13017 opnum, (enum reload_type) type);
13018 something_reloaded = true;
13021 gcc_assert (something_reloaded);
13022 return true;
13025 return false;
13028 /* Determine if op is suitable RTX for an address register.
13029 Return naked register if a register or a register subreg is
13030 found, otherwise return NULL_RTX. */
13032 static rtx
13033 ix86_validate_address_register (rtx op)
13035 enum machine_mode mode = GET_MODE (op);
13037 /* Only SImode or DImode registers can form the address. */
13038 if (mode != SImode && mode != DImode)
13039 return NULL_RTX;
13041 if (REG_P (op))
13042 return op;
13043 else if (GET_CODE (op) == SUBREG)
13045 rtx reg = SUBREG_REG (op);
13047 if (!REG_P (reg))
13048 return NULL_RTX;
13050 mode = GET_MODE (reg);
13052 /* Don't allow SUBREGs that span more than a word. It can
13053 lead to spill failures when the register is one word out
13054 of a two word structure. */
13055 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13056 return NULL_RTX;
13058 /* Allow only SUBREGs of non-eliminable hard registers. */
13059 if (register_no_elim_operand (reg, mode))
13060 return reg;
13063 /* Op is not a register. */
13064 return NULL_RTX;
13067 /* Recognizes RTL expressions that are valid memory addresses for an
13068 instruction. The MODE argument is the machine mode for the MEM
13069 expression that wants to use this address.
13071 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13072 convert common non-canonical forms to canonical form so that they will
13073 be recognized. */
13075 static bool
13076 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
13077 rtx addr, bool strict)
13079 struct ix86_address parts;
13080 rtx base, index, disp;
13081 HOST_WIDE_INT scale;
13082 enum ix86_address_seg seg;
13084 if (ix86_decompose_address (addr, &parts) <= 0)
13085 /* Decomposition failed. */
13086 return false;
13088 base = parts.base;
13089 index = parts.index;
13090 disp = parts.disp;
13091 scale = parts.scale;
13092 seg = parts.seg;
13094 /* Validate base register. */
13095 if (base)
13097 rtx reg = ix86_validate_address_register (base);
13099 if (reg == NULL_RTX)
13100 return false;
13102 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13103 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13104 /* Base is not valid. */
13105 return false;
13108 /* Validate index register. */
13109 if (index)
13111 rtx reg = ix86_validate_address_register (index);
13113 if (reg == NULL_RTX)
13114 return false;
13116 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13117 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13118 /* Index is not valid. */
13119 return false;
13122 /* Index and base should have the same mode. */
13123 if (base && index
13124 && GET_MODE (base) != GET_MODE (index))
13125 return false;
13127 /* Address override works only on the (%reg) part of %fs:(%reg). */
13128 if (seg != SEG_DEFAULT
13129 && ((base && GET_MODE (base) != word_mode)
13130 || (index && GET_MODE (index) != word_mode)))
13131 return false;
13133 /* Validate scale factor. */
13134 if (scale != 1)
13136 if (!index)
13137 /* Scale without index. */
13138 return false;
13140 if (scale != 2 && scale != 4 && scale != 8)
13141 /* Scale is not a valid multiplier. */
13142 return false;
13145 /* Validate displacement. */
13146 if (disp)
13148 if (GET_CODE (disp) == CONST
13149 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13150 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13151 switch (XINT (XEXP (disp, 0), 1))
13153 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13154 used. While ABI specify also 32bit relocations, we don't produce
13155 them at all and use IP relative instead. */
13156 case UNSPEC_GOT:
13157 case UNSPEC_GOTOFF:
13158 gcc_assert (flag_pic);
13159 if (!TARGET_64BIT)
13160 goto is_legitimate_pic;
13162 /* 64bit address unspec. */
13163 return false;
13165 case UNSPEC_GOTPCREL:
13166 case UNSPEC_PCREL:
13167 gcc_assert (flag_pic);
13168 goto is_legitimate_pic;
13170 case UNSPEC_GOTTPOFF:
13171 case UNSPEC_GOTNTPOFF:
13172 case UNSPEC_INDNTPOFF:
13173 case UNSPEC_NTPOFF:
13174 case UNSPEC_DTPOFF:
13175 break;
13177 case UNSPEC_STACK_CHECK:
13178 gcc_assert (flag_split_stack);
13179 break;
13181 default:
13182 /* Invalid address unspec. */
13183 return false;
13186 else if (SYMBOLIC_CONST (disp)
13187 && (flag_pic
13188 || (TARGET_MACHO
13189 #if TARGET_MACHO
13190 && MACHOPIC_INDIRECT
13191 && !machopic_operand_p (disp)
13192 #endif
13196 is_legitimate_pic:
13197 if (TARGET_64BIT && (index || base))
13199 /* foo@dtpoff(%rX) is ok. */
13200 if (GET_CODE (disp) != CONST
13201 || GET_CODE (XEXP (disp, 0)) != PLUS
13202 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13203 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13204 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13205 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13206 /* Non-constant pic memory reference. */
13207 return false;
13209 else if ((!TARGET_MACHO || flag_pic)
13210 && ! legitimate_pic_address_disp_p (disp))
13211 /* Displacement is an invalid pic construct. */
13212 return false;
13213 #if TARGET_MACHO
13214 else if (MACHO_DYNAMIC_NO_PIC_P
13215 && !ix86_legitimate_constant_p (Pmode, disp))
13216 /* displacment must be referenced via non_lazy_pointer */
13217 return false;
13218 #endif
13220 /* This code used to verify that a symbolic pic displacement
13221 includes the pic_offset_table_rtx register.
13223 While this is good idea, unfortunately these constructs may
13224 be created by "adds using lea" optimization for incorrect
13225 code like:
13227 int a;
13228 int foo(int i)
13230 return *(&a+i);
13233 This code is nonsensical, but results in addressing
13234 GOT table with pic_offset_table_rtx base. We can't
13235 just refuse it easily, since it gets matched by
13236 "addsi3" pattern, that later gets split to lea in the
13237 case output register differs from input. While this
13238 can be handled by separate addsi pattern for this case
13239 that never results in lea, this seems to be easier and
13240 correct fix for crash to disable this test. */
13242 else if (GET_CODE (disp) != LABEL_REF
13243 && !CONST_INT_P (disp)
13244 && (GET_CODE (disp) != CONST
13245 || !ix86_legitimate_constant_p (Pmode, disp))
13246 && (GET_CODE (disp) != SYMBOL_REF
13247 || !ix86_legitimate_constant_p (Pmode, disp)))
13248 /* Displacement is not constant. */
13249 return false;
13250 else if (TARGET_64BIT
13251 && !x86_64_immediate_operand (disp, VOIDmode))
13252 /* Displacement is out of range. */
13253 return false;
13254 /* In x32 mode, constant addresses are sign extended to 64bit, so
13255 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13256 else if (TARGET_X32 && !(index || base)
13257 && CONST_INT_P (disp)
13258 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13259 return false;
13262 /* Everything looks valid. */
13263 return true;
13266 /* Determine if a given RTX is a valid constant address. */
13268 bool
13269 constant_address_p (rtx x)
13271 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13274 /* Return a unique alias set for the GOT. */
13276 static alias_set_type
13277 ix86_GOT_alias_set (void)
13279 static alias_set_type set = -1;
13280 if (set == -1)
13281 set = new_alias_set ();
13282 return set;
13285 /* Return a legitimate reference for ORIG (an address) using the
13286 register REG. If REG is 0, a new pseudo is generated.
13288 There are two types of references that must be handled:
13290 1. Global data references must load the address from the GOT, via
13291 the PIC reg. An insn is emitted to do this load, and the reg is
13292 returned.
13294 2. Static data references, constant pool addresses, and code labels
13295 compute the address as an offset from the GOT, whose base is in
13296 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13297 differentiate them from global data objects. The returned
13298 address is the PIC reg + an unspec constant.
13300 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13301 reg also appears in the address. */
13303 static rtx
13304 legitimize_pic_address (rtx orig, rtx reg)
13306 rtx addr = orig;
13307 rtx new_rtx = orig;
13309 #if TARGET_MACHO
13310 if (TARGET_MACHO && !TARGET_64BIT)
13312 if (reg == 0)
13313 reg = gen_reg_rtx (Pmode);
13314 /* Use the generic Mach-O PIC machinery. */
13315 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13317 #endif
13319 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13321 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13322 if (tmp)
13323 return tmp;
13326 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13327 new_rtx = addr;
13328 else if (TARGET_64BIT && !TARGET_PECOFF
13329 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13331 rtx tmpreg;
13332 /* This symbol may be referenced via a displacement from the PIC
13333 base address (@GOTOFF). */
13335 if (reload_in_progress)
13336 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13337 if (GET_CODE (addr) == CONST)
13338 addr = XEXP (addr, 0);
13339 if (GET_CODE (addr) == PLUS)
13341 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13342 UNSPEC_GOTOFF);
13343 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13345 else
13346 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13347 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13348 if (!reg)
13349 tmpreg = gen_reg_rtx (Pmode);
13350 else
13351 tmpreg = reg;
13352 emit_move_insn (tmpreg, new_rtx);
13354 if (reg != 0)
13356 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13357 tmpreg, 1, OPTAB_DIRECT);
13358 new_rtx = reg;
13360 else
13361 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13363 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13365 /* This symbol may be referenced via a displacement from the PIC
13366 base address (@GOTOFF). */
13368 if (reload_in_progress)
13369 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13370 if (GET_CODE (addr) == CONST)
13371 addr = XEXP (addr, 0);
13372 if (GET_CODE (addr) == PLUS)
13374 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13375 UNSPEC_GOTOFF);
13376 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13378 else
13379 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13380 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13381 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13383 if (reg != 0)
13385 emit_move_insn (reg, new_rtx);
13386 new_rtx = reg;
13389 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13390 /* We can't use @GOTOFF for text labels on VxWorks;
13391 see gotoff_operand. */
13392 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13394 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13395 if (tmp)
13396 return tmp;
13398 /* For x64 PE-COFF there is no GOT table. So we use address
13399 directly. */
13400 if (TARGET_64BIT && TARGET_PECOFF)
13402 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13403 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13405 if (reg == 0)
13406 reg = gen_reg_rtx (Pmode);
13407 emit_move_insn (reg, new_rtx);
13408 new_rtx = reg;
13410 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13412 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13413 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13414 new_rtx = gen_const_mem (Pmode, new_rtx);
13415 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13417 if (reg == 0)
13418 reg = gen_reg_rtx (Pmode);
13419 /* Use directly gen_movsi, otherwise the address is loaded
13420 into register for CSE. We don't want to CSE this addresses,
13421 instead we CSE addresses from the GOT table, so skip this. */
13422 emit_insn (gen_movsi (reg, new_rtx));
13423 new_rtx = reg;
13425 else
13427 /* This symbol must be referenced via a load from the
13428 Global Offset Table (@GOT). */
13430 if (reload_in_progress)
13431 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13432 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13433 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13434 if (TARGET_64BIT)
13435 new_rtx = force_reg (Pmode, new_rtx);
13436 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13437 new_rtx = gen_const_mem (Pmode, new_rtx);
13438 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13440 if (reg == 0)
13441 reg = gen_reg_rtx (Pmode);
13442 emit_move_insn (reg, new_rtx);
13443 new_rtx = reg;
13446 else
13448 if (CONST_INT_P (addr)
13449 && !x86_64_immediate_operand (addr, VOIDmode))
13451 if (reg)
13453 emit_move_insn (reg, addr);
13454 new_rtx = reg;
13456 else
13457 new_rtx = force_reg (Pmode, addr);
13459 else if (GET_CODE (addr) == CONST)
13461 addr = XEXP (addr, 0);
13463 /* We must match stuff we generate before. Assume the only
13464 unspecs that can get here are ours. Not that we could do
13465 anything with them anyway.... */
13466 if (GET_CODE (addr) == UNSPEC
13467 || (GET_CODE (addr) == PLUS
13468 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13469 return orig;
13470 gcc_assert (GET_CODE (addr) == PLUS);
13472 if (GET_CODE (addr) == PLUS)
13474 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13476 /* Check first to see if this is a constant offset from a @GOTOFF
13477 symbol reference. */
13478 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13479 && CONST_INT_P (op1))
13481 if (!TARGET_64BIT)
13483 if (reload_in_progress)
13484 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13485 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13486 UNSPEC_GOTOFF);
13487 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13488 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13489 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13491 if (reg != 0)
13493 emit_move_insn (reg, new_rtx);
13494 new_rtx = reg;
13497 else
13499 if (INTVAL (op1) < -16*1024*1024
13500 || INTVAL (op1) >= 16*1024*1024)
13502 if (!x86_64_immediate_operand (op1, Pmode))
13503 op1 = force_reg (Pmode, op1);
13504 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13508 else
13510 rtx base = legitimize_pic_address (op0, reg);
13511 enum machine_mode mode = GET_MODE (base);
13512 new_rtx
13513 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13515 if (CONST_INT_P (new_rtx))
13517 if (INTVAL (new_rtx) < -16*1024*1024
13518 || INTVAL (new_rtx) >= 16*1024*1024)
13520 if (!x86_64_immediate_operand (new_rtx, mode))
13521 new_rtx = force_reg (mode, new_rtx);
13522 new_rtx
13523 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13525 else
13526 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13528 else
13530 if (GET_CODE (new_rtx) == PLUS
13531 && CONSTANT_P (XEXP (new_rtx, 1)))
13533 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13534 new_rtx = XEXP (new_rtx, 1);
13536 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13541 return new_rtx;
13544 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13546 static rtx
13547 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13549 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13551 if (GET_MODE (tp) != tp_mode)
13553 gcc_assert (GET_MODE (tp) == SImode);
13554 gcc_assert (tp_mode == DImode);
13556 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13559 if (to_reg)
13560 tp = copy_to_mode_reg (tp_mode, tp);
13562 return tp;
13565 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13567 static GTY(()) rtx ix86_tls_symbol;
13569 static rtx
13570 ix86_tls_get_addr (void)
13572 if (!ix86_tls_symbol)
13574 const char *sym
13575 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13576 ? "___tls_get_addr" : "__tls_get_addr");
13578 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13581 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13583 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13584 UNSPEC_PLTOFF);
13585 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13586 gen_rtx_CONST (Pmode, unspec));
13589 return ix86_tls_symbol;
13592 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13594 static GTY(()) rtx ix86_tls_module_base_symbol;
13597 ix86_tls_module_base (void)
13599 if (!ix86_tls_module_base_symbol)
13601 ix86_tls_module_base_symbol
13602 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13604 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13605 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13608 return ix86_tls_module_base_symbol;
13611 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13612 false if we expect this to be used for a memory address and true if
13613 we expect to load the address into a register. */
13615 static rtx
13616 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13618 rtx dest, base, off;
13619 rtx pic = NULL_RTX, tp = NULL_RTX;
13620 enum machine_mode tp_mode = Pmode;
13621 int type;
13623 /* Fall back to global dynamic model if tool chain cannot support local
13624 dynamic. */
13625 if (TARGET_SUN_TLS && !TARGET_64BIT
13626 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13627 && model == TLS_MODEL_LOCAL_DYNAMIC)
13628 model = TLS_MODEL_GLOBAL_DYNAMIC;
13630 switch (model)
13632 case TLS_MODEL_GLOBAL_DYNAMIC:
13633 dest = gen_reg_rtx (Pmode);
13635 if (!TARGET_64BIT)
13637 if (flag_pic && !TARGET_PECOFF)
13638 pic = pic_offset_table_rtx;
13639 else
13641 pic = gen_reg_rtx (Pmode);
13642 emit_insn (gen_set_got (pic));
13646 if (TARGET_GNU2_TLS)
13648 if (TARGET_64BIT)
13649 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13650 else
13651 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13653 tp = get_thread_pointer (Pmode, true);
13654 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13656 if (GET_MODE (x) != Pmode)
13657 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13659 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13661 else
13663 rtx caddr = ix86_tls_get_addr ();
13665 if (TARGET_64BIT)
13667 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13668 rtx insns;
13670 start_sequence ();
13671 emit_call_insn
13672 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13673 insns = get_insns ();
13674 end_sequence ();
13676 if (GET_MODE (x) != Pmode)
13677 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13679 RTL_CONST_CALL_P (insns) = 1;
13680 emit_libcall_block (insns, dest, rax, x);
13682 else
13683 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13685 break;
13687 case TLS_MODEL_LOCAL_DYNAMIC:
13688 base = gen_reg_rtx (Pmode);
13690 if (!TARGET_64BIT)
13692 if (flag_pic)
13693 pic = pic_offset_table_rtx;
13694 else
13696 pic = gen_reg_rtx (Pmode);
13697 emit_insn (gen_set_got (pic));
13701 if (TARGET_GNU2_TLS)
13703 rtx tmp = ix86_tls_module_base ();
13705 if (TARGET_64BIT)
13706 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13707 else
13708 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13710 tp = get_thread_pointer (Pmode, true);
13711 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13712 gen_rtx_MINUS (Pmode, tmp, tp));
13714 else
13716 rtx caddr = ix86_tls_get_addr ();
13718 if (TARGET_64BIT)
13720 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13721 rtx insns, eqv;
13723 start_sequence ();
13724 emit_call_insn
13725 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13726 insns = get_insns ();
13727 end_sequence ();
13729 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13730 share the LD_BASE result with other LD model accesses. */
13731 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13732 UNSPEC_TLS_LD_BASE);
13734 RTL_CONST_CALL_P (insns) = 1;
13735 emit_libcall_block (insns, base, rax, eqv);
13737 else
13738 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13741 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13742 off = gen_rtx_CONST (Pmode, off);
13744 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13746 if (TARGET_GNU2_TLS)
13748 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13750 if (GET_MODE (x) != Pmode)
13751 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13753 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13755 break;
13757 case TLS_MODEL_INITIAL_EXEC:
13758 if (TARGET_64BIT)
13760 if (TARGET_SUN_TLS && !TARGET_X32)
13762 /* The Sun linker took the AMD64 TLS spec literally
13763 and can only handle %rax as destination of the
13764 initial executable code sequence. */
13766 dest = gen_reg_rtx (DImode);
13767 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13768 return dest;
13771 /* Generate DImode references to avoid %fs:(%reg32)
13772 problems and linker IE->LE relaxation bug. */
13773 tp_mode = DImode;
13774 pic = NULL;
13775 type = UNSPEC_GOTNTPOFF;
13777 else if (flag_pic)
13779 if (reload_in_progress)
13780 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13781 pic = pic_offset_table_rtx;
13782 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13784 else if (!TARGET_ANY_GNU_TLS)
13786 pic = gen_reg_rtx (Pmode);
13787 emit_insn (gen_set_got (pic));
13788 type = UNSPEC_GOTTPOFF;
13790 else
13792 pic = NULL;
13793 type = UNSPEC_INDNTPOFF;
13796 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13797 off = gen_rtx_CONST (tp_mode, off);
13798 if (pic)
13799 off = gen_rtx_PLUS (tp_mode, pic, off);
13800 off = gen_const_mem (tp_mode, off);
13801 set_mem_alias_set (off, ix86_GOT_alias_set ());
13803 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13805 base = get_thread_pointer (tp_mode,
13806 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13807 off = force_reg (tp_mode, off);
13808 return gen_rtx_PLUS (tp_mode, base, off);
13810 else
13812 base = get_thread_pointer (Pmode, true);
13813 dest = gen_reg_rtx (Pmode);
13814 emit_insn (ix86_gen_sub3 (dest, base, off));
13816 break;
13818 case TLS_MODEL_LOCAL_EXEC:
13819 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13820 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13821 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13822 off = gen_rtx_CONST (Pmode, off);
13824 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13826 base = get_thread_pointer (Pmode,
13827 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13828 return gen_rtx_PLUS (Pmode, base, off);
13830 else
13832 base = get_thread_pointer (Pmode, true);
13833 dest = gen_reg_rtx (Pmode);
13834 emit_insn (ix86_gen_sub3 (dest, base, off));
13836 break;
13838 default:
13839 gcc_unreachable ();
13842 return dest;
13845 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13846 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13847 unique refptr-DECL symbol corresponding to symbol DECL. */
13849 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13850 htab_t dllimport_map;
13852 static tree
13853 get_dllimport_decl (tree decl, bool beimport)
13855 struct tree_map *h, in;
13856 void **loc;
13857 const char *name;
13858 const char *prefix;
13859 size_t namelen, prefixlen;
13860 char *imp_name;
13861 tree to;
13862 rtx rtl;
13864 if (!dllimport_map)
13865 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13867 in.hash = htab_hash_pointer (decl);
13868 in.base.from = decl;
13869 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13870 h = (struct tree_map *) *loc;
13871 if (h)
13872 return h->to;
13874 *loc = h = ggc_alloc_tree_map ();
13875 h->hash = in.hash;
13876 h->base.from = decl;
13877 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13878 VAR_DECL, NULL, ptr_type_node);
13879 DECL_ARTIFICIAL (to) = 1;
13880 DECL_IGNORED_P (to) = 1;
13881 DECL_EXTERNAL (to) = 1;
13882 TREE_READONLY (to) = 1;
13884 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13885 name = targetm.strip_name_encoding (name);
13886 if (beimport)
13887 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13888 ? "*__imp_" : "*__imp__";
13889 else
13890 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13891 namelen = strlen (name);
13892 prefixlen = strlen (prefix);
13893 imp_name = (char *) alloca (namelen + prefixlen + 1);
13894 memcpy (imp_name, prefix, prefixlen);
13895 memcpy (imp_name + prefixlen, name, namelen + 1);
13897 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13898 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13899 SET_SYMBOL_REF_DECL (rtl, to);
13900 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13901 if (!beimport)
13903 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13904 #ifdef SUB_TARGET_RECORD_STUB
13905 SUB_TARGET_RECORD_STUB (name);
13906 #endif
13909 rtl = gen_const_mem (Pmode, rtl);
13910 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13912 SET_DECL_RTL (to, rtl);
13913 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13915 return to;
13918 /* Expand SYMBOL into its corresponding far-addresse symbol.
13919 WANT_REG is true if we require the result be a register. */
13921 static rtx
13922 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13924 tree imp_decl;
13925 rtx x;
13927 gcc_assert (SYMBOL_REF_DECL (symbol));
13928 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13930 x = DECL_RTL (imp_decl);
13931 if (want_reg)
13932 x = force_reg (Pmode, x);
13933 return x;
13936 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13937 true if we require the result be a register. */
13939 static rtx
13940 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13942 tree imp_decl;
13943 rtx x;
13945 gcc_assert (SYMBOL_REF_DECL (symbol));
13946 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13948 x = DECL_RTL (imp_decl);
13949 if (want_reg)
13950 x = force_reg (Pmode, x);
13951 return x;
13954 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13955 is true if we require the result be a register. */
13957 static rtx
13958 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13960 if (!TARGET_PECOFF)
13961 return NULL_RTX;
13963 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13965 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13966 return legitimize_dllimport_symbol (addr, inreg);
13967 if (GET_CODE (addr) == CONST
13968 && GET_CODE (XEXP (addr, 0)) == PLUS
13969 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13970 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13972 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13973 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13977 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13978 return NULL_RTX;
13979 if (GET_CODE (addr) == SYMBOL_REF
13980 && !is_imported_p (addr)
13981 && SYMBOL_REF_EXTERNAL_P (addr)
13982 && SYMBOL_REF_DECL (addr))
13983 return legitimize_pe_coff_extern_decl (addr, inreg);
13985 if (GET_CODE (addr) == CONST
13986 && GET_CODE (XEXP (addr, 0)) == PLUS
13987 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13988 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13989 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13990 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13992 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13993 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13995 return NULL_RTX;
13998 /* Try machine-dependent ways of modifying an illegitimate address
13999 to be legitimate. If we find one, return the new, valid address.
14000 This macro is used in only one place: `memory_address' in explow.c.
14002 OLDX is the address as it was before break_out_memory_refs was called.
14003 In some cases it is useful to look at this to decide what needs to be done.
14005 It is always safe for this macro to do nothing. It exists to recognize
14006 opportunities to optimize the output.
14008 For the 80386, we handle X+REG by loading X into a register R and
14009 using R+REG. R will go in a general reg and indexing will be used.
14010 However, if REG is a broken-out memory address or multiplication,
14011 nothing needs to be done because REG can certainly go in a general reg.
14013 When -fpic is used, special handling is needed for symbolic references.
14014 See comments by legitimize_pic_address in i386.c for details. */
14016 static rtx
14017 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
14018 enum machine_mode mode)
14020 int changed = 0;
14021 unsigned log;
14023 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14024 if (log)
14025 return legitimize_tls_address (x, (enum tls_model) log, false);
14026 if (GET_CODE (x) == CONST
14027 && GET_CODE (XEXP (x, 0)) == PLUS
14028 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14029 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14031 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14032 (enum tls_model) log, false);
14033 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14036 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14038 rtx tmp = legitimize_pe_coff_symbol (x, true);
14039 if (tmp)
14040 return tmp;
14043 if (flag_pic && SYMBOLIC_CONST (x))
14044 return legitimize_pic_address (x, 0);
14046 #if TARGET_MACHO
14047 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14048 return machopic_indirect_data_reference (x, 0);
14049 #endif
14051 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14052 if (GET_CODE (x) == ASHIFT
14053 && CONST_INT_P (XEXP (x, 1))
14054 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14056 changed = 1;
14057 log = INTVAL (XEXP (x, 1));
14058 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14059 GEN_INT (1 << log));
14062 if (GET_CODE (x) == PLUS)
14064 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14066 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14067 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14068 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14070 changed = 1;
14071 log = INTVAL (XEXP (XEXP (x, 0), 1));
14072 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14073 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14074 GEN_INT (1 << log));
14077 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14078 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14079 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14081 changed = 1;
14082 log = INTVAL (XEXP (XEXP (x, 1), 1));
14083 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14084 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14085 GEN_INT (1 << log));
14088 /* Put multiply first if it isn't already. */
14089 if (GET_CODE (XEXP (x, 1)) == MULT)
14091 rtx tmp = XEXP (x, 0);
14092 XEXP (x, 0) = XEXP (x, 1);
14093 XEXP (x, 1) = tmp;
14094 changed = 1;
14097 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14098 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14099 created by virtual register instantiation, register elimination, and
14100 similar optimizations. */
14101 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14103 changed = 1;
14104 x = gen_rtx_PLUS (Pmode,
14105 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14106 XEXP (XEXP (x, 1), 0)),
14107 XEXP (XEXP (x, 1), 1));
14110 /* Canonicalize
14111 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14112 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14113 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14114 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14115 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14116 && CONSTANT_P (XEXP (x, 1)))
14118 rtx constant;
14119 rtx other = NULL_RTX;
14121 if (CONST_INT_P (XEXP (x, 1)))
14123 constant = XEXP (x, 1);
14124 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14126 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14128 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14129 other = XEXP (x, 1);
14131 else
14132 constant = 0;
14134 if (constant)
14136 changed = 1;
14137 x = gen_rtx_PLUS (Pmode,
14138 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14139 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14140 plus_constant (Pmode, other,
14141 INTVAL (constant)));
14145 if (changed && ix86_legitimate_address_p (mode, x, false))
14146 return x;
14148 if (GET_CODE (XEXP (x, 0)) == MULT)
14150 changed = 1;
14151 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14154 if (GET_CODE (XEXP (x, 1)) == MULT)
14156 changed = 1;
14157 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14160 if (changed
14161 && REG_P (XEXP (x, 1))
14162 && REG_P (XEXP (x, 0)))
14163 return x;
14165 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14167 changed = 1;
14168 x = legitimize_pic_address (x, 0);
14171 if (changed && ix86_legitimate_address_p (mode, x, false))
14172 return x;
14174 if (REG_P (XEXP (x, 0)))
14176 rtx temp = gen_reg_rtx (Pmode);
14177 rtx val = force_operand (XEXP (x, 1), temp);
14178 if (val != temp)
14180 val = convert_to_mode (Pmode, val, 1);
14181 emit_move_insn (temp, val);
14184 XEXP (x, 1) = temp;
14185 return x;
14188 else if (REG_P (XEXP (x, 1)))
14190 rtx temp = gen_reg_rtx (Pmode);
14191 rtx val = force_operand (XEXP (x, 0), temp);
14192 if (val != temp)
14194 val = convert_to_mode (Pmode, val, 1);
14195 emit_move_insn (temp, val);
14198 XEXP (x, 0) = temp;
14199 return x;
14203 return x;
14206 /* Print an integer constant expression in assembler syntax. Addition
14207 and subtraction are the only arithmetic that may appear in these
14208 expressions. FILE is the stdio stream to write to, X is the rtx, and
14209 CODE is the operand print code from the output string. */
14211 static void
14212 output_pic_addr_const (FILE *file, rtx x, int code)
14214 char buf[256];
14216 switch (GET_CODE (x))
14218 case PC:
14219 gcc_assert (flag_pic);
14220 putc ('.', file);
14221 break;
14223 case SYMBOL_REF:
14224 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14225 output_addr_const (file, x);
14226 else
14228 const char *name = XSTR (x, 0);
14230 /* Mark the decl as referenced so that cgraph will
14231 output the function. */
14232 if (SYMBOL_REF_DECL (x))
14233 mark_decl_referenced (SYMBOL_REF_DECL (x));
14235 #if TARGET_MACHO
14236 if (MACHOPIC_INDIRECT
14237 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14238 name = machopic_indirection_name (x, /*stub_p=*/true);
14239 #endif
14240 assemble_name (file, name);
14242 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14243 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14244 fputs ("@PLT", file);
14245 break;
14247 case LABEL_REF:
14248 x = XEXP (x, 0);
14249 /* FALLTHRU */
14250 case CODE_LABEL:
14251 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14252 assemble_name (asm_out_file, buf);
14253 break;
14255 case CONST_INT:
14256 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14257 break;
14259 case CONST:
14260 /* This used to output parentheses around the expression,
14261 but that does not work on the 386 (either ATT or BSD assembler). */
14262 output_pic_addr_const (file, XEXP (x, 0), code);
14263 break;
14265 case CONST_DOUBLE:
14266 if (GET_MODE (x) == VOIDmode)
14268 /* We can use %d if the number is <32 bits and positive. */
14269 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14270 fprintf (file, "0x%lx%08lx",
14271 (unsigned long) CONST_DOUBLE_HIGH (x),
14272 (unsigned long) CONST_DOUBLE_LOW (x));
14273 else
14274 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14276 else
14277 /* We can't handle floating point constants;
14278 TARGET_PRINT_OPERAND must handle them. */
14279 output_operand_lossage ("floating constant misused");
14280 break;
14282 case PLUS:
14283 /* Some assemblers need integer constants to appear first. */
14284 if (CONST_INT_P (XEXP (x, 0)))
14286 output_pic_addr_const (file, XEXP (x, 0), code);
14287 putc ('+', file);
14288 output_pic_addr_const (file, XEXP (x, 1), code);
14290 else
14292 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14293 output_pic_addr_const (file, XEXP (x, 1), code);
14294 putc ('+', file);
14295 output_pic_addr_const (file, XEXP (x, 0), code);
14297 break;
14299 case MINUS:
14300 if (!TARGET_MACHO)
14301 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14302 output_pic_addr_const (file, XEXP (x, 0), code);
14303 putc ('-', file);
14304 output_pic_addr_const (file, XEXP (x, 1), code);
14305 if (!TARGET_MACHO)
14306 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14307 break;
14309 case UNSPEC:
14310 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14312 bool f = i386_asm_output_addr_const_extra (file, x);
14313 gcc_assert (f);
14314 break;
14317 gcc_assert (XVECLEN (x, 0) == 1);
14318 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14319 switch (XINT (x, 1))
14321 case UNSPEC_GOT:
14322 fputs ("@GOT", file);
14323 break;
14324 case UNSPEC_GOTOFF:
14325 fputs ("@GOTOFF", file);
14326 break;
14327 case UNSPEC_PLTOFF:
14328 fputs ("@PLTOFF", file);
14329 break;
14330 case UNSPEC_PCREL:
14331 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14332 "(%rip)" : "[rip]", file);
14333 break;
14334 case UNSPEC_GOTPCREL:
14335 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14336 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14337 break;
14338 case UNSPEC_GOTTPOFF:
14339 /* FIXME: This might be @TPOFF in Sun ld too. */
14340 fputs ("@gottpoff", file);
14341 break;
14342 case UNSPEC_TPOFF:
14343 fputs ("@tpoff", file);
14344 break;
14345 case UNSPEC_NTPOFF:
14346 if (TARGET_64BIT)
14347 fputs ("@tpoff", file);
14348 else
14349 fputs ("@ntpoff", file);
14350 break;
14351 case UNSPEC_DTPOFF:
14352 fputs ("@dtpoff", file);
14353 break;
14354 case UNSPEC_GOTNTPOFF:
14355 if (TARGET_64BIT)
14356 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14357 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14358 else
14359 fputs ("@gotntpoff", file);
14360 break;
14361 case UNSPEC_INDNTPOFF:
14362 fputs ("@indntpoff", file);
14363 break;
14364 #if TARGET_MACHO
14365 case UNSPEC_MACHOPIC_OFFSET:
14366 putc ('-', file);
14367 machopic_output_function_base_name (file);
14368 break;
14369 #endif
14370 default:
14371 output_operand_lossage ("invalid UNSPEC as operand");
14372 break;
14374 break;
14376 default:
14377 output_operand_lossage ("invalid expression as operand");
14381 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14382 We need to emit DTP-relative relocations. */
14384 static void ATTRIBUTE_UNUSED
14385 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14387 fputs (ASM_LONG, file);
14388 output_addr_const (file, x);
14389 fputs ("@dtpoff", file);
14390 switch (size)
14392 case 4:
14393 break;
14394 case 8:
14395 fputs (", 0", file);
14396 break;
14397 default:
14398 gcc_unreachable ();
14402 /* Return true if X is a representation of the PIC register. This copes
14403 with calls from ix86_find_base_term, where the register might have
14404 been replaced by a cselib value. */
14406 static bool
14407 ix86_pic_register_p (rtx x)
14409 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14410 return (pic_offset_table_rtx
14411 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14412 else
14413 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14416 /* Helper function for ix86_delegitimize_address.
14417 Attempt to delegitimize TLS local-exec accesses. */
14419 static rtx
14420 ix86_delegitimize_tls_address (rtx orig_x)
14422 rtx x = orig_x, unspec;
14423 struct ix86_address addr;
14425 if (!TARGET_TLS_DIRECT_SEG_REFS)
14426 return orig_x;
14427 if (MEM_P (x))
14428 x = XEXP (x, 0);
14429 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14430 return orig_x;
14431 if (ix86_decompose_address (x, &addr) == 0
14432 || addr.seg != DEFAULT_TLS_SEG_REG
14433 || addr.disp == NULL_RTX
14434 || GET_CODE (addr.disp) != CONST)
14435 return orig_x;
14436 unspec = XEXP (addr.disp, 0);
14437 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14438 unspec = XEXP (unspec, 0);
14439 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14440 return orig_x;
14441 x = XVECEXP (unspec, 0, 0);
14442 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14443 if (unspec != XEXP (addr.disp, 0))
14444 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14445 if (addr.index)
14447 rtx idx = addr.index;
14448 if (addr.scale != 1)
14449 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14450 x = gen_rtx_PLUS (Pmode, idx, x);
14452 if (addr.base)
14453 x = gen_rtx_PLUS (Pmode, addr.base, x);
14454 if (MEM_P (orig_x))
14455 x = replace_equiv_address_nv (orig_x, x);
14456 return x;
14459 /* In the name of slightly smaller debug output, and to cater to
14460 general assembler lossage, recognize PIC+GOTOFF and turn it back
14461 into a direct symbol reference.
14463 On Darwin, this is necessary to avoid a crash, because Darwin
14464 has a different PIC label for each routine but the DWARF debugging
14465 information is not associated with any particular routine, so it's
14466 necessary to remove references to the PIC label from RTL stored by
14467 the DWARF output code. */
14469 static rtx
14470 ix86_delegitimize_address (rtx x)
14472 rtx orig_x = delegitimize_mem_from_attrs (x);
14473 /* addend is NULL or some rtx if x is something+GOTOFF where
14474 something doesn't include the PIC register. */
14475 rtx addend = NULL_RTX;
14476 /* reg_addend is NULL or a multiple of some register. */
14477 rtx reg_addend = NULL_RTX;
14478 /* const_addend is NULL or a const_int. */
14479 rtx const_addend = NULL_RTX;
14480 /* This is the result, or NULL. */
14481 rtx result = NULL_RTX;
14483 x = orig_x;
14485 if (MEM_P (x))
14486 x = XEXP (x, 0);
14488 if (TARGET_64BIT)
14490 if (GET_CODE (x) == CONST
14491 && GET_CODE (XEXP (x, 0)) == PLUS
14492 && GET_MODE (XEXP (x, 0)) == Pmode
14493 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14494 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14495 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14497 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14498 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14499 if (MEM_P (orig_x))
14500 x = replace_equiv_address_nv (orig_x, x);
14501 return x;
14504 if (GET_CODE (x) == CONST
14505 && GET_CODE (XEXP (x, 0)) == UNSPEC
14506 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14507 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14508 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14510 x = XVECEXP (XEXP (x, 0), 0, 0);
14511 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14513 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14514 GET_MODE (x), 0);
14515 if (x == NULL_RTX)
14516 return orig_x;
14518 return x;
14521 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14522 return ix86_delegitimize_tls_address (orig_x);
14524 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14525 and -mcmodel=medium -fpic. */
14528 if (GET_CODE (x) != PLUS
14529 || GET_CODE (XEXP (x, 1)) != CONST)
14530 return ix86_delegitimize_tls_address (orig_x);
14532 if (ix86_pic_register_p (XEXP (x, 0)))
14533 /* %ebx + GOT/GOTOFF */
14535 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14537 /* %ebx + %reg * scale + GOT/GOTOFF */
14538 reg_addend = XEXP (x, 0);
14539 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14540 reg_addend = XEXP (reg_addend, 1);
14541 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14542 reg_addend = XEXP (reg_addend, 0);
14543 else
14545 reg_addend = NULL_RTX;
14546 addend = XEXP (x, 0);
14549 else
14550 addend = XEXP (x, 0);
14552 x = XEXP (XEXP (x, 1), 0);
14553 if (GET_CODE (x) == PLUS
14554 && CONST_INT_P (XEXP (x, 1)))
14556 const_addend = XEXP (x, 1);
14557 x = XEXP (x, 0);
14560 if (GET_CODE (x) == UNSPEC
14561 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14562 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14563 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14564 && !MEM_P (orig_x) && !addend)))
14565 result = XVECEXP (x, 0, 0);
14567 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14568 && !MEM_P (orig_x))
14569 result = XVECEXP (x, 0, 0);
14571 if (! result)
14572 return ix86_delegitimize_tls_address (orig_x);
14574 if (const_addend)
14575 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14576 if (reg_addend)
14577 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14578 if (addend)
14580 /* If the rest of original X doesn't involve the PIC register, add
14581 addend and subtract pic_offset_table_rtx. This can happen e.g.
14582 for code like:
14583 leal (%ebx, %ecx, 4), %ecx
14585 movl foo@GOTOFF(%ecx), %edx
14586 in which case we return (%ecx - %ebx) + foo. */
14587 if (pic_offset_table_rtx)
14588 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14589 pic_offset_table_rtx),
14590 result);
14591 else
14592 return orig_x;
14594 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14596 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14597 if (result == NULL_RTX)
14598 return orig_x;
14600 return result;
14603 /* If X is a machine specific address (i.e. a symbol or label being
14604 referenced as a displacement from the GOT implemented using an
14605 UNSPEC), then return the base term. Otherwise return X. */
14608 ix86_find_base_term (rtx x)
14610 rtx term;
14612 if (TARGET_64BIT)
14614 if (GET_CODE (x) != CONST)
14615 return x;
14616 term = XEXP (x, 0);
14617 if (GET_CODE (term) == PLUS
14618 && (CONST_INT_P (XEXP (term, 1))
14619 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14620 term = XEXP (term, 0);
14621 if (GET_CODE (term) != UNSPEC
14622 || (XINT (term, 1) != UNSPEC_GOTPCREL
14623 && XINT (term, 1) != UNSPEC_PCREL))
14624 return x;
14626 return XVECEXP (term, 0, 0);
14629 return ix86_delegitimize_address (x);
14632 static void
14633 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14634 bool fp, FILE *file)
14636 const char *suffix;
14638 if (mode == CCFPmode || mode == CCFPUmode)
14640 code = ix86_fp_compare_code_to_integer (code);
14641 mode = CCmode;
14643 if (reverse)
14644 code = reverse_condition (code);
14646 switch (code)
14648 case EQ:
14649 switch (mode)
14651 case CCAmode:
14652 suffix = "a";
14653 break;
14655 case CCCmode:
14656 suffix = "c";
14657 break;
14659 case CCOmode:
14660 suffix = "o";
14661 break;
14663 case CCSmode:
14664 suffix = "s";
14665 break;
14667 default:
14668 suffix = "e";
14670 break;
14671 case NE:
14672 switch (mode)
14674 case CCAmode:
14675 suffix = "na";
14676 break;
14678 case CCCmode:
14679 suffix = "nc";
14680 break;
14682 case CCOmode:
14683 suffix = "no";
14684 break;
14686 case CCSmode:
14687 suffix = "ns";
14688 break;
14690 default:
14691 suffix = "ne";
14693 break;
14694 case GT:
14695 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14696 suffix = "g";
14697 break;
14698 case GTU:
14699 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14700 Those same assemblers have the same but opposite lossage on cmov. */
14701 if (mode == CCmode)
14702 suffix = fp ? "nbe" : "a";
14703 else
14704 gcc_unreachable ();
14705 break;
14706 case LT:
14707 switch (mode)
14709 case CCNOmode:
14710 case CCGOCmode:
14711 suffix = "s";
14712 break;
14714 case CCmode:
14715 case CCGCmode:
14716 suffix = "l";
14717 break;
14719 default:
14720 gcc_unreachable ();
14722 break;
14723 case LTU:
14724 if (mode == CCmode)
14725 suffix = "b";
14726 else if (mode == CCCmode)
14727 suffix = "c";
14728 else
14729 gcc_unreachable ();
14730 break;
14731 case GE:
14732 switch (mode)
14734 case CCNOmode:
14735 case CCGOCmode:
14736 suffix = "ns";
14737 break;
14739 case CCmode:
14740 case CCGCmode:
14741 suffix = "ge";
14742 break;
14744 default:
14745 gcc_unreachable ();
14747 break;
14748 case GEU:
14749 if (mode == CCmode)
14750 suffix = fp ? "nb" : "ae";
14751 else if (mode == CCCmode)
14752 suffix = "nc";
14753 else
14754 gcc_unreachable ();
14755 break;
14756 case LE:
14757 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14758 suffix = "le";
14759 break;
14760 case LEU:
14761 if (mode == CCmode)
14762 suffix = "be";
14763 else
14764 gcc_unreachable ();
14765 break;
14766 case UNORDERED:
14767 suffix = fp ? "u" : "p";
14768 break;
14769 case ORDERED:
14770 suffix = fp ? "nu" : "np";
14771 break;
14772 default:
14773 gcc_unreachable ();
14775 fputs (suffix, file);
14778 /* Print the name of register X to FILE based on its machine mode and number.
14779 If CODE is 'w', pretend the mode is HImode.
14780 If CODE is 'b', pretend the mode is QImode.
14781 If CODE is 'k', pretend the mode is SImode.
14782 If CODE is 'q', pretend the mode is DImode.
14783 If CODE is 'x', pretend the mode is V4SFmode.
14784 If CODE is 't', pretend the mode is V8SFmode.
14785 If CODE is 'g', pretend the mode is V16SFmode.
14786 If CODE is 'h', pretend the reg is the 'high' byte register.
14787 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14788 If CODE is 'd', duplicate the operand for AVX instruction.
14791 void
14792 print_reg (rtx x, int code, FILE *file)
14794 const char *reg;
14795 unsigned int regno;
14796 bool duplicated = code == 'd' && TARGET_AVX;
14798 if (ASSEMBLER_DIALECT == ASM_ATT)
14799 putc ('%', file);
14801 if (x == pc_rtx)
14803 gcc_assert (TARGET_64BIT);
14804 fputs ("rip", file);
14805 return;
14808 regno = true_regnum (x);
14809 gcc_assert (regno != ARG_POINTER_REGNUM
14810 && regno != FRAME_POINTER_REGNUM
14811 && regno != FLAGS_REG
14812 && regno != FPSR_REG
14813 && regno != FPCR_REG);
14815 if (code == 'w' || MMX_REG_P (x))
14816 code = 2;
14817 else if (code == 'b')
14818 code = 1;
14819 else if (code == 'k')
14820 code = 4;
14821 else if (code == 'q')
14822 code = 8;
14823 else if (code == 'y')
14824 code = 3;
14825 else if (code == 'h')
14826 code = 0;
14827 else if (code == 'x')
14828 code = 16;
14829 else if (code == 't')
14830 code = 32;
14831 else if (code == 'g')
14832 code = 64;
14833 else
14834 code = GET_MODE_SIZE (GET_MODE (x));
14836 /* Irritatingly, AMD extended registers use different naming convention
14837 from the normal registers: "r%d[bwd]" */
14838 if (REX_INT_REGNO_P (regno))
14840 gcc_assert (TARGET_64BIT);
14841 putc ('r', file);
14842 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14843 switch (code)
14845 case 0:
14846 error ("extended registers have no high halves");
14847 break;
14848 case 1:
14849 putc ('b', file);
14850 break;
14851 case 2:
14852 putc ('w', file);
14853 break;
14854 case 4:
14855 putc ('d', file);
14856 break;
14857 case 8:
14858 /* no suffix */
14859 break;
14860 default:
14861 error ("unsupported operand size for extended register");
14862 break;
14864 return;
14867 reg = NULL;
14868 switch (code)
14870 case 3:
14871 if (STACK_TOP_P (x))
14873 reg = "st(0)";
14874 break;
14876 /* FALLTHRU */
14877 case 8:
14878 case 4:
14879 case 12:
14880 if (! ANY_FP_REG_P (x) && ! ANY_BND_REG_P (x))
14881 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14882 /* FALLTHRU */
14883 case 16:
14884 case 2:
14885 normal:
14886 reg = hi_reg_name[regno];
14887 break;
14888 case 1:
14889 if (regno >= ARRAY_SIZE (qi_reg_name))
14890 goto normal;
14891 reg = qi_reg_name[regno];
14892 break;
14893 case 0:
14894 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14895 goto normal;
14896 reg = qi_high_reg_name[regno];
14897 break;
14898 case 32:
14899 if (SSE_REG_P (x))
14901 gcc_assert (!duplicated);
14902 putc ('y', file);
14903 fputs (hi_reg_name[regno] + 1, file);
14904 return;
14906 case 64:
14907 if (SSE_REG_P (x))
14909 gcc_assert (!duplicated);
14910 putc ('z', file);
14911 fputs (hi_reg_name[REGNO (x)] + 1, file);
14912 return;
14914 break;
14915 default:
14916 gcc_unreachable ();
14919 fputs (reg, file);
14920 if (duplicated)
14922 if (ASSEMBLER_DIALECT == ASM_ATT)
14923 fprintf (file, ", %%%s", reg);
14924 else
14925 fprintf (file, ", %s", reg);
14929 /* Locate some local-dynamic symbol still in use by this function
14930 so that we can print its name in some tls_local_dynamic_base
14931 pattern. */
14933 static int
14934 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14936 rtx x = *px;
14938 if (GET_CODE (x) == SYMBOL_REF
14939 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14941 cfun->machine->some_ld_name = XSTR (x, 0);
14942 return 1;
14945 return 0;
14948 static const char *
14949 get_some_local_dynamic_name (void)
14951 rtx insn;
14953 if (cfun->machine->some_ld_name)
14954 return cfun->machine->some_ld_name;
14956 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14957 if (NONDEBUG_INSN_P (insn)
14958 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14959 return cfun->machine->some_ld_name;
14961 return NULL;
14964 /* Meaning of CODE:
14965 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14966 C -- print opcode suffix for set/cmov insn.
14967 c -- like C, but print reversed condition
14968 F,f -- likewise, but for floating-point.
14969 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14970 otherwise nothing
14971 R -- print embeded rounding and sae.
14972 r -- print only sae.
14973 z -- print the opcode suffix for the size of the current operand.
14974 Z -- likewise, with special suffixes for x87 instructions.
14975 * -- print a star (in certain assembler syntax)
14976 A -- print an absolute memory reference.
14977 E -- print address with DImode register names if TARGET_64BIT.
14978 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14979 s -- print a shift double count, followed by the assemblers argument
14980 delimiter.
14981 b -- print the QImode name of the register for the indicated operand.
14982 %b0 would print %al if operands[0] is reg 0.
14983 w -- likewise, print the HImode name of the register.
14984 k -- likewise, print the SImode name of the register.
14985 q -- likewise, print the DImode name of the register.
14986 x -- likewise, print the V4SFmode name of the register.
14987 t -- likewise, print the V8SFmode name of the register.
14988 g -- likewise, print the V16SFmode name of the register.
14989 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14990 y -- print "st(0)" instead of "st" as a register.
14991 d -- print duplicated register operand for AVX instruction.
14992 D -- print condition for SSE cmp instruction.
14993 P -- if PIC, print an @PLT suffix.
14994 p -- print raw symbol name.
14995 X -- don't print any sort of PIC '@' suffix for a symbol.
14996 & -- print some in-use local-dynamic symbol name.
14997 H -- print a memory address offset by 8; used for sse high-parts
14998 Y -- print condition for XOP pcom* instruction.
14999 + -- print a branch hint as 'cs' or 'ds' prefix
15000 ; -- print a semicolon (after prefixes due to bug in older gas).
15001 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15002 @ -- print a segment register of thread base pointer load
15003 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15004 ! -- print MPX prefix for jxx/call/ret instructions if required.
15007 void
15008 ix86_print_operand (FILE *file, rtx x, int code)
15010 if (code)
15012 switch (code)
15014 case 'A':
15015 switch (ASSEMBLER_DIALECT)
15017 case ASM_ATT:
15018 putc ('*', file);
15019 break;
15021 case ASM_INTEL:
15022 /* Intel syntax. For absolute addresses, registers should not
15023 be surrounded by braces. */
15024 if (!REG_P (x))
15026 putc ('[', file);
15027 ix86_print_operand (file, x, 0);
15028 putc (']', file);
15029 return;
15031 break;
15033 default:
15034 gcc_unreachable ();
15037 ix86_print_operand (file, x, 0);
15038 return;
15040 case 'E':
15041 /* Wrap address in an UNSPEC to declare special handling. */
15042 if (TARGET_64BIT)
15043 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15045 output_address (x);
15046 return;
15048 case 'L':
15049 if (ASSEMBLER_DIALECT == ASM_ATT)
15050 putc ('l', file);
15051 return;
15053 case 'W':
15054 if (ASSEMBLER_DIALECT == ASM_ATT)
15055 putc ('w', file);
15056 return;
15058 case 'B':
15059 if (ASSEMBLER_DIALECT == ASM_ATT)
15060 putc ('b', file);
15061 return;
15063 case 'Q':
15064 if (ASSEMBLER_DIALECT == ASM_ATT)
15065 putc ('l', file);
15066 return;
15068 case 'S':
15069 if (ASSEMBLER_DIALECT == ASM_ATT)
15070 putc ('s', file);
15071 return;
15073 case 'T':
15074 if (ASSEMBLER_DIALECT == ASM_ATT)
15075 putc ('t', file);
15076 return;
15078 case 'O':
15079 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15080 if (ASSEMBLER_DIALECT != ASM_ATT)
15081 return;
15083 switch (GET_MODE_SIZE (GET_MODE (x)))
15085 case 2:
15086 putc ('w', file);
15087 break;
15089 case 4:
15090 putc ('l', file);
15091 break;
15093 case 8:
15094 putc ('q', file);
15095 break;
15097 default:
15098 output_operand_lossage
15099 ("invalid operand size for operand code 'O'");
15100 return;
15103 putc ('.', file);
15104 #endif
15105 return;
15107 case 'z':
15108 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15110 /* Opcodes don't get size suffixes if using Intel opcodes. */
15111 if (ASSEMBLER_DIALECT == ASM_INTEL)
15112 return;
15114 switch (GET_MODE_SIZE (GET_MODE (x)))
15116 case 1:
15117 putc ('b', file);
15118 return;
15120 case 2:
15121 putc ('w', file);
15122 return;
15124 case 4:
15125 putc ('l', file);
15126 return;
15128 case 8:
15129 putc ('q', file);
15130 return;
15132 default:
15133 output_operand_lossage
15134 ("invalid operand size for operand code 'z'");
15135 return;
15139 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15140 warning
15141 (0, "non-integer operand used with operand code 'z'");
15142 /* FALLTHRU */
15144 case 'Z':
15145 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15146 if (ASSEMBLER_DIALECT == ASM_INTEL)
15147 return;
15149 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15151 switch (GET_MODE_SIZE (GET_MODE (x)))
15153 case 2:
15154 #ifdef HAVE_AS_IX86_FILDS
15155 putc ('s', file);
15156 #endif
15157 return;
15159 case 4:
15160 putc ('l', file);
15161 return;
15163 case 8:
15164 #ifdef HAVE_AS_IX86_FILDQ
15165 putc ('q', file);
15166 #else
15167 fputs ("ll", file);
15168 #endif
15169 return;
15171 default:
15172 break;
15175 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15177 /* 387 opcodes don't get size suffixes
15178 if the operands are registers. */
15179 if (STACK_REG_P (x))
15180 return;
15182 switch (GET_MODE_SIZE (GET_MODE (x)))
15184 case 4:
15185 putc ('s', file);
15186 return;
15188 case 8:
15189 putc ('l', file);
15190 return;
15192 case 12:
15193 case 16:
15194 putc ('t', file);
15195 return;
15197 default:
15198 break;
15201 else
15203 output_operand_lossage
15204 ("invalid operand type used with operand code 'Z'");
15205 return;
15208 output_operand_lossage
15209 ("invalid operand size for operand code 'Z'");
15210 return;
15212 case 'd':
15213 case 'b':
15214 case 'w':
15215 case 'k':
15216 case 'q':
15217 case 'h':
15218 case 't':
15219 case 'g':
15220 case 'y':
15221 case 'x':
15222 case 'X':
15223 case 'P':
15224 case 'p':
15225 break;
15227 case 's':
15228 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15230 ix86_print_operand (file, x, 0);
15231 fputs (", ", file);
15233 return;
15235 case 'Y':
15236 switch (GET_CODE (x))
15238 case NE:
15239 fputs ("neq", file);
15240 break;
15241 case EQ:
15242 fputs ("eq", file);
15243 break;
15244 case GE:
15245 case GEU:
15246 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15247 break;
15248 case GT:
15249 case GTU:
15250 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15251 break;
15252 case LE:
15253 case LEU:
15254 fputs ("le", file);
15255 break;
15256 case LT:
15257 case LTU:
15258 fputs ("lt", file);
15259 break;
15260 case UNORDERED:
15261 fputs ("unord", file);
15262 break;
15263 case ORDERED:
15264 fputs ("ord", file);
15265 break;
15266 case UNEQ:
15267 fputs ("ueq", file);
15268 break;
15269 case UNGE:
15270 fputs ("nlt", file);
15271 break;
15272 case UNGT:
15273 fputs ("nle", file);
15274 break;
15275 case UNLE:
15276 fputs ("ule", file);
15277 break;
15278 case UNLT:
15279 fputs ("ult", file);
15280 break;
15281 case LTGT:
15282 fputs ("une", file);
15283 break;
15284 default:
15285 output_operand_lossage ("operand is not a condition code, "
15286 "invalid operand code 'Y'");
15287 return;
15289 return;
15291 case 'D':
15292 /* Little bit of braindamage here. The SSE compare instructions
15293 does use completely different names for the comparisons that the
15294 fp conditional moves. */
15295 switch (GET_CODE (x))
15297 case UNEQ:
15298 if (TARGET_AVX)
15300 fputs ("eq_us", file);
15301 break;
15303 case EQ:
15304 fputs ("eq", file);
15305 break;
15306 case UNLT:
15307 if (TARGET_AVX)
15309 fputs ("nge", file);
15310 break;
15312 case LT:
15313 fputs ("lt", file);
15314 break;
15315 case UNLE:
15316 if (TARGET_AVX)
15318 fputs ("ngt", file);
15319 break;
15321 case LE:
15322 fputs ("le", file);
15323 break;
15324 case UNORDERED:
15325 fputs ("unord", file);
15326 break;
15327 case LTGT:
15328 if (TARGET_AVX)
15330 fputs ("neq_oq", file);
15331 break;
15333 case NE:
15334 fputs ("neq", file);
15335 break;
15336 case GE:
15337 if (TARGET_AVX)
15339 fputs ("ge", file);
15340 break;
15342 case UNGE:
15343 fputs ("nlt", file);
15344 break;
15345 case GT:
15346 if (TARGET_AVX)
15348 fputs ("gt", file);
15349 break;
15351 case UNGT:
15352 fputs ("nle", file);
15353 break;
15354 case ORDERED:
15355 fputs ("ord", file);
15356 break;
15357 default:
15358 output_operand_lossage ("operand is not a condition code, "
15359 "invalid operand code 'D'");
15360 return;
15362 return;
15364 case 'F':
15365 case 'f':
15366 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15367 if (ASSEMBLER_DIALECT == ASM_ATT)
15368 putc ('.', file);
15369 #endif
15371 case 'C':
15372 case 'c':
15373 if (!COMPARISON_P (x))
15375 output_operand_lossage ("operand is not a condition code, "
15376 "invalid operand code '%c'", code);
15377 return;
15379 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15380 code == 'c' || code == 'f',
15381 code == 'F' || code == 'f',
15382 file);
15383 return;
15385 case 'H':
15386 if (!offsettable_memref_p (x))
15388 output_operand_lossage ("operand is not an offsettable memory "
15389 "reference, invalid operand code 'H'");
15390 return;
15392 /* It doesn't actually matter what mode we use here, as we're
15393 only going to use this for printing. */
15394 x = adjust_address_nv (x, DImode, 8);
15395 /* Output 'qword ptr' for intel assembler dialect. */
15396 if (ASSEMBLER_DIALECT == ASM_INTEL)
15397 code = 'q';
15398 break;
15400 case 'K':
15401 gcc_assert (CONST_INT_P (x));
15403 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15404 #ifdef HAVE_AS_IX86_HLE
15405 fputs ("xacquire ", file);
15406 #else
15407 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15408 #endif
15409 else if (INTVAL (x) & IX86_HLE_RELEASE)
15410 #ifdef HAVE_AS_IX86_HLE
15411 fputs ("xrelease ", file);
15412 #else
15413 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15414 #endif
15415 /* We do not want to print value of the operand. */
15416 return;
15418 case 'N':
15419 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15420 fputs ("{z}", file);
15421 return;
15423 case 'r':
15424 gcc_assert (CONST_INT_P (x));
15425 gcc_assert (INTVAL (x) == ROUND_SAE);
15427 if (ASSEMBLER_DIALECT == ASM_INTEL)
15428 fputs (", ", file);
15430 fputs ("{sae}", file);
15432 if (ASSEMBLER_DIALECT == ASM_ATT)
15433 fputs (", ", file);
15435 return;
15437 case 'R':
15438 gcc_assert (CONST_INT_P (x));
15440 if (ASSEMBLER_DIALECT == ASM_INTEL)
15441 fputs (", ", file);
15443 switch (INTVAL (x))
15445 case ROUND_NEAREST_INT | ROUND_SAE:
15446 fputs ("{rn-sae}", file);
15447 break;
15448 case ROUND_NEG_INF | ROUND_SAE:
15449 fputs ("{rd-sae}", file);
15450 break;
15451 case ROUND_POS_INF | ROUND_SAE:
15452 fputs ("{ru-sae}", file);
15453 break;
15454 case ROUND_ZERO | ROUND_SAE:
15455 fputs ("{rz-sae}", file);
15456 break;
15457 default:
15458 gcc_unreachable ();
15461 if (ASSEMBLER_DIALECT == ASM_ATT)
15462 fputs (", ", file);
15464 return;
15466 case '*':
15467 if (ASSEMBLER_DIALECT == ASM_ATT)
15468 putc ('*', file);
15469 return;
15471 case '&':
15473 const char *name = get_some_local_dynamic_name ();
15474 if (name == NULL)
15475 output_operand_lossage ("'%%&' used without any "
15476 "local dynamic TLS references");
15477 else
15478 assemble_name (file, name);
15479 return;
15482 case '+':
15484 rtx x;
15486 if (!optimize
15487 || optimize_function_for_size_p (cfun)
15488 || !TARGET_BRANCH_PREDICTION_HINTS)
15489 return;
15491 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15492 if (x)
15494 int pred_val = XINT (x, 0);
15496 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15497 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15499 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15500 bool cputaken
15501 = final_forward_branch_p (current_output_insn) == 0;
15503 /* Emit hints only in the case default branch prediction
15504 heuristics would fail. */
15505 if (taken != cputaken)
15507 /* We use 3e (DS) prefix for taken branches and
15508 2e (CS) prefix for not taken branches. */
15509 if (taken)
15510 fputs ("ds ; ", file);
15511 else
15512 fputs ("cs ; ", file);
15516 return;
15519 case ';':
15520 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15521 putc (';', file);
15522 #endif
15523 return;
15525 case '@':
15526 if (ASSEMBLER_DIALECT == ASM_ATT)
15527 putc ('%', file);
15529 /* The kernel uses a different segment register for performance
15530 reasons; a system call would not have to trash the userspace
15531 segment register, which would be expensive. */
15532 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15533 fputs ("fs", file);
15534 else
15535 fputs ("gs", file);
15536 return;
15538 case '~':
15539 putc (TARGET_AVX2 ? 'i' : 'f', file);
15540 return;
15542 case '^':
15543 if (TARGET_64BIT && Pmode != word_mode)
15544 fputs ("addr32 ", file);
15545 return;
15547 case '!':
15548 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15549 fputs ("bnd ", file);
15550 return;
15552 default:
15553 output_operand_lossage ("invalid operand code '%c'", code);
15557 if (REG_P (x))
15558 print_reg (x, code, file);
15560 else if (MEM_P (x))
15562 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15563 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15564 && GET_MODE (x) != BLKmode)
15566 const char * size;
15567 switch (GET_MODE_SIZE (GET_MODE (x)))
15569 case 1: size = "BYTE"; break;
15570 case 2: size = "WORD"; break;
15571 case 4: size = "DWORD"; break;
15572 case 8: size = "QWORD"; break;
15573 case 12: size = "TBYTE"; break;
15574 case 16:
15575 if (GET_MODE (x) == XFmode)
15576 size = "TBYTE";
15577 else
15578 size = "XMMWORD";
15579 break;
15580 case 32: size = "YMMWORD"; break;
15581 case 64: size = "ZMMWORD"; break;
15582 default:
15583 gcc_unreachable ();
15586 /* Check for explicit size override (codes 'b', 'w', 'k',
15587 'q' and 'x') */
15588 if (code == 'b')
15589 size = "BYTE";
15590 else if (code == 'w')
15591 size = "WORD";
15592 else if (code == 'k')
15593 size = "DWORD";
15594 else if (code == 'q')
15595 size = "QWORD";
15596 else if (code == 'x')
15597 size = "XMMWORD";
15599 fputs (size, file);
15600 fputs (" PTR ", file);
15603 x = XEXP (x, 0);
15604 /* Avoid (%rip) for call operands. */
15605 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15606 && !CONST_INT_P (x))
15607 output_addr_const (file, x);
15608 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15609 output_operand_lossage ("invalid constraints for operand");
15610 else
15611 output_address (x);
15614 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15616 REAL_VALUE_TYPE r;
15617 long l;
15619 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15620 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15622 if (ASSEMBLER_DIALECT == ASM_ATT)
15623 putc ('$', file);
15624 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15625 if (code == 'q')
15626 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15627 (unsigned long long) (int) l);
15628 else
15629 fprintf (file, "0x%08x", (unsigned int) l);
15632 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15634 REAL_VALUE_TYPE r;
15635 long l[2];
15637 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15638 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15640 if (ASSEMBLER_DIALECT == ASM_ATT)
15641 putc ('$', file);
15642 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15645 /* These float cases don't actually occur as immediate operands. */
15646 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15648 char dstr[30];
15650 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15651 fputs (dstr, file);
15654 else
15656 /* We have patterns that allow zero sets of memory, for instance.
15657 In 64-bit mode, we should probably support all 8-byte vectors,
15658 since we can in fact encode that into an immediate. */
15659 if (GET_CODE (x) == CONST_VECTOR)
15661 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15662 x = const0_rtx;
15665 if (code != 'P' && code != 'p')
15667 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15669 if (ASSEMBLER_DIALECT == ASM_ATT)
15670 putc ('$', file);
15672 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15673 || GET_CODE (x) == LABEL_REF)
15675 if (ASSEMBLER_DIALECT == ASM_ATT)
15676 putc ('$', file);
15677 else
15678 fputs ("OFFSET FLAT:", file);
15681 if (CONST_INT_P (x))
15682 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15683 else if (flag_pic || MACHOPIC_INDIRECT)
15684 output_pic_addr_const (file, x, code);
15685 else
15686 output_addr_const (file, x);
15690 static bool
15691 ix86_print_operand_punct_valid_p (unsigned char code)
15693 return (code == '@' || code == '*' || code == '+' || code == '&'
15694 || code == ';' || code == '~' || code == '^' || code == '!');
15697 /* Print a memory operand whose address is ADDR. */
15699 static void
15700 ix86_print_operand_address (FILE *file, rtx addr)
15702 struct ix86_address parts;
15703 rtx base, index, disp;
15704 int scale;
15705 int ok;
15706 bool vsib = false;
15707 int code = 0;
15709 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15711 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15712 gcc_assert (parts.index == NULL_RTX);
15713 parts.index = XVECEXP (addr, 0, 1);
15714 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15715 addr = XVECEXP (addr, 0, 0);
15716 vsib = true;
15718 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15720 gcc_assert (TARGET_64BIT);
15721 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15722 code = 'q';
15724 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
15726 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
15727 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
15728 if (parts.base != NULL_RTX)
15730 parts.index = parts.base;
15731 parts.scale = 1;
15733 parts.base = XVECEXP (addr, 0, 0);
15734 addr = XVECEXP (addr, 0, 0);
15736 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
15738 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15739 gcc_assert (parts.index == NULL_RTX);
15740 parts.index = XVECEXP (addr, 0, 1);
15741 addr = XVECEXP (addr, 0, 0);
15743 else
15744 ok = ix86_decompose_address (addr, &parts);
15746 gcc_assert (ok);
15748 base = parts.base;
15749 index = parts.index;
15750 disp = parts.disp;
15751 scale = parts.scale;
15753 switch (parts.seg)
15755 case SEG_DEFAULT:
15756 break;
15757 case SEG_FS:
15758 case SEG_GS:
15759 if (ASSEMBLER_DIALECT == ASM_ATT)
15760 putc ('%', file);
15761 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15762 break;
15763 default:
15764 gcc_unreachable ();
15767 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15768 if (TARGET_64BIT && !base && !index)
15770 rtx symbol = disp;
15772 if (GET_CODE (disp) == CONST
15773 && GET_CODE (XEXP (disp, 0)) == PLUS
15774 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15775 symbol = XEXP (XEXP (disp, 0), 0);
15777 if (GET_CODE (symbol) == LABEL_REF
15778 || (GET_CODE (symbol) == SYMBOL_REF
15779 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15780 base = pc_rtx;
15782 if (!base && !index)
15784 /* Displacement only requires special attention. */
15786 if (CONST_INT_P (disp))
15788 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15789 fputs ("ds:", file);
15790 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15792 else if (flag_pic)
15793 output_pic_addr_const (file, disp, 0);
15794 else
15795 output_addr_const (file, disp);
15797 else
15799 /* Print SImode register names to force addr32 prefix. */
15800 if (SImode_address_operand (addr, VOIDmode))
15802 #ifdef ENABLE_CHECKING
15803 gcc_assert (TARGET_64BIT);
15804 switch (GET_CODE (addr))
15806 case SUBREG:
15807 gcc_assert (GET_MODE (addr) == SImode);
15808 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15809 break;
15810 case ZERO_EXTEND:
15811 case AND:
15812 gcc_assert (GET_MODE (addr) == DImode);
15813 break;
15814 default:
15815 gcc_unreachable ();
15817 #endif
15818 gcc_assert (!code);
15819 code = 'k';
15821 else if (code == 0
15822 && TARGET_X32
15823 && disp
15824 && CONST_INT_P (disp)
15825 && INTVAL (disp) < -16*1024*1024)
15827 /* X32 runs in 64-bit mode, where displacement, DISP, in
15828 address DISP(%r64), is encoded as 32-bit immediate sign-
15829 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15830 address is %r64 + 0xffffffffbffffd00. When %r64 <
15831 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15832 which is invalid for x32. The correct address is %r64
15833 - 0x40000300 == 0xf7ffdd64. To properly encode
15834 -0x40000300(%r64) for x32, we zero-extend negative
15835 displacement by forcing addr32 prefix which truncates
15836 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15837 zero-extend all negative displacements, including -1(%rsp).
15838 However, for small negative displacements, sign-extension
15839 won't cause overflow. We only zero-extend negative
15840 displacements if they < -16*1024*1024, which is also used
15841 to check legitimate address displacements for PIC. */
15842 code = 'k';
15845 if (ASSEMBLER_DIALECT == ASM_ATT)
15847 if (disp)
15849 if (flag_pic)
15850 output_pic_addr_const (file, disp, 0);
15851 else if (GET_CODE (disp) == LABEL_REF)
15852 output_asm_label (disp);
15853 else
15854 output_addr_const (file, disp);
15857 putc ('(', file);
15858 if (base)
15859 print_reg (base, code, file);
15860 if (index)
15862 putc (',', file);
15863 print_reg (index, vsib ? 0 : code, file);
15864 if (scale != 1 || vsib)
15865 fprintf (file, ",%d", scale);
15867 putc (')', file);
15869 else
15871 rtx offset = NULL_RTX;
15873 if (disp)
15875 /* Pull out the offset of a symbol; print any symbol itself. */
15876 if (GET_CODE (disp) == CONST
15877 && GET_CODE (XEXP (disp, 0)) == PLUS
15878 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15880 offset = XEXP (XEXP (disp, 0), 1);
15881 disp = gen_rtx_CONST (VOIDmode,
15882 XEXP (XEXP (disp, 0), 0));
15885 if (flag_pic)
15886 output_pic_addr_const (file, disp, 0);
15887 else if (GET_CODE (disp) == LABEL_REF)
15888 output_asm_label (disp);
15889 else if (CONST_INT_P (disp))
15890 offset = disp;
15891 else
15892 output_addr_const (file, disp);
15895 putc ('[', file);
15896 if (base)
15898 print_reg (base, code, file);
15899 if (offset)
15901 if (INTVAL (offset) >= 0)
15902 putc ('+', file);
15903 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15906 else if (offset)
15907 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15908 else
15909 putc ('0', file);
15911 if (index)
15913 putc ('+', file);
15914 print_reg (index, vsib ? 0 : code, file);
15915 if (scale != 1 || vsib)
15916 fprintf (file, "*%d", scale);
15918 putc (']', file);
15923 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15925 static bool
15926 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15928 rtx op;
15930 if (GET_CODE (x) != UNSPEC)
15931 return false;
15933 op = XVECEXP (x, 0, 0);
15934 switch (XINT (x, 1))
15936 case UNSPEC_GOTTPOFF:
15937 output_addr_const (file, op);
15938 /* FIXME: This might be @TPOFF in Sun ld. */
15939 fputs ("@gottpoff", file);
15940 break;
15941 case UNSPEC_TPOFF:
15942 output_addr_const (file, op);
15943 fputs ("@tpoff", file);
15944 break;
15945 case UNSPEC_NTPOFF:
15946 output_addr_const (file, op);
15947 if (TARGET_64BIT)
15948 fputs ("@tpoff", file);
15949 else
15950 fputs ("@ntpoff", file);
15951 break;
15952 case UNSPEC_DTPOFF:
15953 output_addr_const (file, op);
15954 fputs ("@dtpoff", file);
15955 break;
15956 case UNSPEC_GOTNTPOFF:
15957 output_addr_const (file, op);
15958 if (TARGET_64BIT)
15959 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15960 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15961 else
15962 fputs ("@gotntpoff", file);
15963 break;
15964 case UNSPEC_INDNTPOFF:
15965 output_addr_const (file, op);
15966 fputs ("@indntpoff", file);
15967 break;
15968 #if TARGET_MACHO
15969 case UNSPEC_MACHOPIC_OFFSET:
15970 output_addr_const (file, op);
15971 putc ('-', file);
15972 machopic_output_function_base_name (file);
15973 break;
15974 #endif
15976 case UNSPEC_STACK_CHECK:
15978 int offset;
15980 gcc_assert (flag_split_stack);
15982 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15983 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15984 #else
15985 gcc_unreachable ();
15986 #endif
15988 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15990 break;
15992 default:
15993 return false;
15996 return true;
15999 /* Split one or more double-mode RTL references into pairs of half-mode
16000 references. The RTL can be REG, offsettable MEM, integer constant, or
16001 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16002 split and "num" is its length. lo_half and hi_half are output arrays
16003 that parallel "operands". */
16005 void
16006 split_double_mode (enum machine_mode mode, rtx operands[],
16007 int num, rtx lo_half[], rtx hi_half[])
16009 enum machine_mode half_mode;
16010 unsigned int byte;
16012 switch (mode)
16014 case TImode:
16015 half_mode = DImode;
16016 break;
16017 case DImode:
16018 half_mode = SImode;
16019 break;
16020 default:
16021 gcc_unreachable ();
16024 byte = GET_MODE_SIZE (half_mode);
16026 while (num--)
16028 rtx op = operands[num];
16030 /* simplify_subreg refuse to split volatile memory addresses,
16031 but we still have to handle it. */
16032 if (MEM_P (op))
16034 lo_half[num] = adjust_address (op, half_mode, 0);
16035 hi_half[num] = adjust_address (op, half_mode, byte);
16037 else
16039 lo_half[num] = simplify_gen_subreg (half_mode, op,
16040 GET_MODE (op) == VOIDmode
16041 ? mode : GET_MODE (op), 0);
16042 hi_half[num] = simplify_gen_subreg (half_mode, op,
16043 GET_MODE (op) == VOIDmode
16044 ? mode : GET_MODE (op), byte);
16049 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16050 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16051 is the expression of the binary operation. The output may either be
16052 emitted here, or returned to the caller, like all output_* functions.
16054 There is no guarantee that the operands are the same mode, as they
16055 might be within FLOAT or FLOAT_EXTEND expressions. */
16057 #ifndef SYSV386_COMPAT
16058 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16059 wants to fix the assemblers because that causes incompatibility
16060 with gcc. No-one wants to fix gcc because that causes
16061 incompatibility with assemblers... You can use the option of
16062 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16063 #define SYSV386_COMPAT 1
16064 #endif
16066 const char *
16067 output_387_binary_op (rtx insn, rtx *operands)
16069 static char buf[40];
16070 const char *p;
16071 const char *ssep;
16072 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16074 #ifdef ENABLE_CHECKING
16075 /* Even if we do not want to check the inputs, this documents input
16076 constraints. Which helps in understanding the following code. */
16077 if (STACK_REG_P (operands[0])
16078 && ((REG_P (operands[1])
16079 && REGNO (operands[0]) == REGNO (operands[1])
16080 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16081 || (REG_P (operands[2])
16082 && REGNO (operands[0]) == REGNO (operands[2])
16083 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16084 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16085 ; /* ok */
16086 else
16087 gcc_assert (is_sse);
16088 #endif
16090 switch (GET_CODE (operands[3]))
16092 case PLUS:
16093 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16094 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16095 p = "fiadd";
16096 else
16097 p = "fadd";
16098 ssep = "vadd";
16099 break;
16101 case MINUS:
16102 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16103 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16104 p = "fisub";
16105 else
16106 p = "fsub";
16107 ssep = "vsub";
16108 break;
16110 case MULT:
16111 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16112 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16113 p = "fimul";
16114 else
16115 p = "fmul";
16116 ssep = "vmul";
16117 break;
16119 case DIV:
16120 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16121 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16122 p = "fidiv";
16123 else
16124 p = "fdiv";
16125 ssep = "vdiv";
16126 break;
16128 default:
16129 gcc_unreachable ();
16132 if (is_sse)
16134 if (TARGET_AVX)
16136 strcpy (buf, ssep);
16137 if (GET_MODE (operands[0]) == SFmode)
16138 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16139 else
16140 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16142 else
16144 strcpy (buf, ssep + 1);
16145 if (GET_MODE (operands[0]) == SFmode)
16146 strcat (buf, "ss\t{%2, %0|%0, %2}");
16147 else
16148 strcat (buf, "sd\t{%2, %0|%0, %2}");
16150 return buf;
16152 strcpy (buf, p);
16154 switch (GET_CODE (operands[3]))
16156 case MULT:
16157 case PLUS:
16158 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16160 rtx temp = operands[2];
16161 operands[2] = operands[1];
16162 operands[1] = temp;
16165 /* know operands[0] == operands[1]. */
16167 if (MEM_P (operands[2]))
16169 p = "%Z2\t%2";
16170 break;
16173 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16175 if (STACK_TOP_P (operands[0]))
16176 /* How is it that we are storing to a dead operand[2]?
16177 Well, presumably operands[1] is dead too. We can't
16178 store the result to st(0) as st(0) gets popped on this
16179 instruction. Instead store to operands[2] (which I
16180 think has to be st(1)). st(1) will be popped later.
16181 gcc <= 2.8.1 didn't have this check and generated
16182 assembly code that the Unixware assembler rejected. */
16183 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16184 else
16185 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16186 break;
16189 if (STACK_TOP_P (operands[0]))
16190 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16191 else
16192 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16193 break;
16195 case MINUS:
16196 case DIV:
16197 if (MEM_P (operands[1]))
16199 p = "r%Z1\t%1";
16200 break;
16203 if (MEM_P (operands[2]))
16205 p = "%Z2\t%2";
16206 break;
16209 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16211 #if SYSV386_COMPAT
16212 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16213 derived assemblers, confusingly reverse the direction of
16214 the operation for fsub{r} and fdiv{r} when the
16215 destination register is not st(0). The Intel assembler
16216 doesn't have this brain damage. Read !SYSV386_COMPAT to
16217 figure out what the hardware really does. */
16218 if (STACK_TOP_P (operands[0]))
16219 p = "{p\t%0, %2|rp\t%2, %0}";
16220 else
16221 p = "{rp\t%2, %0|p\t%0, %2}";
16222 #else
16223 if (STACK_TOP_P (operands[0]))
16224 /* As above for fmul/fadd, we can't store to st(0). */
16225 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16226 else
16227 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16228 #endif
16229 break;
16232 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16234 #if SYSV386_COMPAT
16235 if (STACK_TOP_P (operands[0]))
16236 p = "{rp\t%0, %1|p\t%1, %0}";
16237 else
16238 p = "{p\t%1, %0|rp\t%0, %1}";
16239 #else
16240 if (STACK_TOP_P (operands[0]))
16241 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16242 else
16243 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16244 #endif
16245 break;
16248 if (STACK_TOP_P (operands[0]))
16250 if (STACK_TOP_P (operands[1]))
16251 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16252 else
16253 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16254 break;
16256 else if (STACK_TOP_P (operands[1]))
16258 #if SYSV386_COMPAT
16259 p = "{\t%1, %0|r\t%0, %1}";
16260 #else
16261 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16262 #endif
16264 else
16266 #if SYSV386_COMPAT
16267 p = "{r\t%2, %0|\t%0, %2}";
16268 #else
16269 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16270 #endif
16272 break;
16274 default:
16275 gcc_unreachable ();
16278 strcat (buf, p);
16279 return buf;
16282 /* Check if a 256bit AVX register is referenced inside of EXP. */
16284 static int
16285 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16287 rtx exp = *pexp;
16289 if (GET_CODE (exp) == SUBREG)
16290 exp = SUBREG_REG (exp);
16292 if (REG_P (exp)
16293 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16294 return 1;
16296 return 0;
16299 /* Return needed mode for entity in optimize_mode_switching pass. */
16301 static int
16302 ix86_avx_u128_mode_needed (rtx insn)
16304 if (CALL_P (insn))
16306 rtx link;
16308 /* Needed mode is set to AVX_U128_CLEAN if there are
16309 no 256bit modes used in function arguments. */
16310 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16311 link;
16312 link = XEXP (link, 1))
16314 if (GET_CODE (XEXP (link, 0)) == USE)
16316 rtx arg = XEXP (XEXP (link, 0), 0);
16318 if (ix86_check_avx256_register (&arg, NULL))
16319 return AVX_U128_DIRTY;
16323 return AVX_U128_CLEAN;
16326 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16327 changes state only when a 256bit register is written to, but we need
16328 to prevent the compiler from moving optimal insertion point above
16329 eventual read from 256bit register. */
16330 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16331 return AVX_U128_DIRTY;
16333 return AVX_U128_ANY;
16336 /* Return mode that i387 must be switched into
16337 prior to the execution of insn. */
16339 static int
16340 ix86_i387_mode_needed (int entity, rtx insn)
16342 enum attr_i387_cw mode;
16344 /* The mode UNINITIALIZED is used to store control word after a
16345 function call or ASM pattern. The mode ANY specify that function
16346 has no requirements on the control word and make no changes in the
16347 bits we are interested in. */
16349 if (CALL_P (insn)
16350 || (NONJUMP_INSN_P (insn)
16351 && (asm_noperands (PATTERN (insn)) >= 0
16352 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16353 return I387_CW_UNINITIALIZED;
16355 if (recog_memoized (insn) < 0)
16356 return I387_CW_ANY;
16358 mode = get_attr_i387_cw (insn);
16360 switch (entity)
16362 case I387_TRUNC:
16363 if (mode == I387_CW_TRUNC)
16364 return mode;
16365 break;
16367 case I387_FLOOR:
16368 if (mode == I387_CW_FLOOR)
16369 return mode;
16370 break;
16372 case I387_CEIL:
16373 if (mode == I387_CW_CEIL)
16374 return mode;
16375 break;
16377 case I387_MASK_PM:
16378 if (mode == I387_CW_MASK_PM)
16379 return mode;
16380 break;
16382 default:
16383 gcc_unreachable ();
16386 return I387_CW_ANY;
16389 /* Return mode that entity must be switched into
16390 prior to the execution of insn. */
16393 ix86_mode_needed (int entity, rtx insn)
16395 switch (entity)
16397 case AVX_U128:
16398 return ix86_avx_u128_mode_needed (insn);
16399 case I387_TRUNC:
16400 case I387_FLOOR:
16401 case I387_CEIL:
16402 case I387_MASK_PM:
16403 return ix86_i387_mode_needed (entity, insn);
16404 default:
16405 gcc_unreachable ();
16407 return 0;
16410 /* Check if a 256bit AVX register is referenced in stores. */
16412 static void
16413 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16415 if (ix86_check_avx256_register (&dest, NULL))
16417 bool *used = (bool *) data;
16418 *used = true;
16422 /* Calculate mode of upper 128bit AVX registers after the insn. */
16424 static int
16425 ix86_avx_u128_mode_after (int mode, rtx insn)
16427 rtx pat = PATTERN (insn);
16429 if (vzeroupper_operation (pat, VOIDmode)
16430 || vzeroall_operation (pat, VOIDmode))
16431 return AVX_U128_CLEAN;
16433 /* We know that state is clean after CALL insn if there are no
16434 256bit registers used in the function return register. */
16435 if (CALL_P (insn))
16437 bool avx_reg256_found = false;
16438 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16440 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16443 /* Otherwise, return current mode. Remember that if insn
16444 references AVX 256bit registers, the mode was already changed
16445 to DIRTY from MODE_NEEDED. */
16446 return mode;
16449 /* Return the mode that an insn results in. */
16452 ix86_mode_after (int entity, int mode, rtx insn)
16454 switch (entity)
16456 case AVX_U128:
16457 return ix86_avx_u128_mode_after (mode, insn);
16458 case I387_TRUNC:
16459 case I387_FLOOR:
16460 case I387_CEIL:
16461 case I387_MASK_PM:
16462 return mode;
16463 default:
16464 gcc_unreachable ();
16468 static int
16469 ix86_avx_u128_mode_entry (void)
16471 tree arg;
16473 /* Entry mode is set to AVX_U128_DIRTY if there are
16474 256bit modes used in function arguments. */
16475 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16476 arg = TREE_CHAIN (arg))
16478 rtx incoming = DECL_INCOMING_RTL (arg);
16480 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16481 return AVX_U128_DIRTY;
16484 return AVX_U128_CLEAN;
16487 /* Return a mode that ENTITY is assumed to be
16488 switched to at function entry. */
16491 ix86_mode_entry (int entity)
16493 switch (entity)
16495 case AVX_U128:
16496 return ix86_avx_u128_mode_entry ();
16497 case I387_TRUNC:
16498 case I387_FLOOR:
16499 case I387_CEIL:
16500 case I387_MASK_PM:
16501 return I387_CW_ANY;
16502 default:
16503 gcc_unreachable ();
16507 static int
16508 ix86_avx_u128_mode_exit (void)
16510 rtx reg = crtl->return_rtx;
16512 /* Exit mode is set to AVX_U128_DIRTY if there are
16513 256bit modes used in the function return register. */
16514 if (reg && ix86_check_avx256_register (&reg, NULL))
16515 return AVX_U128_DIRTY;
16517 return AVX_U128_CLEAN;
16520 /* Return a mode that ENTITY is assumed to be
16521 switched to at function exit. */
16524 ix86_mode_exit (int entity)
16526 switch (entity)
16528 case AVX_U128:
16529 return ix86_avx_u128_mode_exit ();
16530 case I387_TRUNC:
16531 case I387_FLOOR:
16532 case I387_CEIL:
16533 case I387_MASK_PM:
16534 return I387_CW_ANY;
16535 default:
16536 gcc_unreachable ();
16540 /* Output code to initialize control word copies used by trunc?f?i and
16541 rounding patterns. CURRENT_MODE is set to current control word,
16542 while NEW_MODE is set to new control word. */
16544 static void
16545 emit_i387_cw_initialization (int mode)
16547 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16548 rtx new_mode;
16550 enum ix86_stack_slot slot;
16552 rtx reg = gen_reg_rtx (HImode);
16554 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16555 emit_move_insn (reg, copy_rtx (stored_mode));
16557 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16558 || optimize_insn_for_size_p ())
16560 switch (mode)
16562 case I387_CW_TRUNC:
16563 /* round toward zero (truncate) */
16564 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16565 slot = SLOT_CW_TRUNC;
16566 break;
16568 case I387_CW_FLOOR:
16569 /* round down toward -oo */
16570 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16571 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16572 slot = SLOT_CW_FLOOR;
16573 break;
16575 case I387_CW_CEIL:
16576 /* round up toward +oo */
16577 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16578 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16579 slot = SLOT_CW_CEIL;
16580 break;
16582 case I387_CW_MASK_PM:
16583 /* mask precision exception for nearbyint() */
16584 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16585 slot = SLOT_CW_MASK_PM;
16586 break;
16588 default:
16589 gcc_unreachable ();
16592 else
16594 switch (mode)
16596 case I387_CW_TRUNC:
16597 /* round toward zero (truncate) */
16598 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16599 slot = SLOT_CW_TRUNC;
16600 break;
16602 case I387_CW_FLOOR:
16603 /* round down toward -oo */
16604 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16605 slot = SLOT_CW_FLOOR;
16606 break;
16608 case I387_CW_CEIL:
16609 /* round up toward +oo */
16610 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16611 slot = SLOT_CW_CEIL;
16612 break;
16614 case I387_CW_MASK_PM:
16615 /* mask precision exception for nearbyint() */
16616 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16617 slot = SLOT_CW_MASK_PM;
16618 break;
16620 default:
16621 gcc_unreachable ();
16625 gcc_assert (slot < MAX_386_STACK_LOCALS);
16627 new_mode = assign_386_stack_local (HImode, slot);
16628 emit_move_insn (new_mode, reg);
16631 /* Emit vzeroupper. */
16633 void
16634 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16636 int i;
16638 /* Cancel automatic vzeroupper insertion if there are
16639 live call-saved SSE registers at the insertion point. */
16641 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16642 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16643 return;
16645 if (TARGET_64BIT)
16646 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16647 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16648 return;
16650 emit_insn (gen_avx_vzeroupper ());
16653 /* Generate one or more insns to set ENTITY to MODE. */
16655 void
16656 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16658 switch (entity)
16660 case AVX_U128:
16661 if (mode == AVX_U128_CLEAN)
16662 ix86_avx_emit_vzeroupper (regs_live);
16663 break;
16664 case I387_TRUNC:
16665 case I387_FLOOR:
16666 case I387_CEIL:
16667 case I387_MASK_PM:
16668 if (mode != I387_CW_ANY
16669 && mode != I387_CW_UNINITIALIZED)
16670 emit_i387_cw_initialization (mode);
16671 break;
16672 default:
16673 gcc_unreachable ();
16677 /* Output code for INSN to convert a float to a signed int. OPERANDS
16678 are the insn operands. The output may be [HSD]Imode and the input
16679 operand may be [SDX]Fmode. */
16681 const char *
16682 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16684 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16685 int dimode_p = GET_MODE (operands[0]) == DImode;
16686 int round_mode = get_attr_i387_cw (insn);
16688 /* Jump through a hoop or two for DImode, since the hardware has no
16689 non-popping instruction. We used to do this a different way, but
16690 that was somewhat fragile and broke with post-reload splitters. */
16691 if ((dimode_p || fisttp) && !stack_top_dies)
16692 output_asm_insn ("fld\t%y1", operands);
16694 gcc_assert (STACK_TOP_P (operands[1]));
16695 gcc_assert (MEM_P (operands[0]));
16696 gcc_assert (GET_MODE (operands[1]) != TFmode);
16698 if (fisttp)
16699 output_asm_insn ("fisttp%Z0\t%0", operands);
16700 else
16702 if (round_mode != I387_CW_ANY)
16703 output_asm_insn ("fldcw\t%3", operands);
16704 if (stack_top_dies || dimode_p)
16705 output_asm_insn ("fistp%Z0\t%0", operands);
16706 else
16707 output_asm_insn ("fist%Z0\t%0", operands);
16708 if (round_mode != I387_CW_ANY)
16709 output_asm_insn ("fldcw\t%2", operands);
16712 return "";
16715 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16716 have the values zero or one, indicates the ffreep insn's operand
16717 from the OPERANDS array. */
16719 static const char *
16720 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16722 if (TARGET_USE_FFREEP)
16723 #ifdef HAVE_AS_IX86_FFREEP
16724 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16725 #else
16727 static char retval[32];
16728 int regno = REGNO (operands[opno]);
16730 gcc_assert (STACK_REGNO_P (regno));
16732 regno -= FIRST_STACK_REG;
16734 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16735 return retval;
16737 #endif
16739 return opno ? "fstp\t%y1" : "fstp\t%y0";
16743 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16744 should be used. UNORDERED_P is true when fucom should be used. */
16746 const char *
16747 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16749 int stack_top_dies;
16750 rtx cmp_op0, cmp_op1;
16751 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16753 if (eflags_p)
16755 cmp_op0 = operands[0];
16756 cmp_op1 = operands[1];
16758 else
16760 cmp_op0 = operands[1];
16761 cmp_op1 = operands[2];
16764 if (is_sse)
16766 if (GET_MODE (operands[0]) == SFmode)
16767 if (unordered_p)
16768 return "%vucomiss\t{%1, %0|%0, %1}";
16769 else
16770 return "%vcomiss\t{%1, %0|%0, %1}";
16771 else
16772 if (unordered_p)
16773 return "%vucomisd\t{%1, %0|%0, %1}";
16774 else
16775 return "%vcomisd\t{%1, %0|%0, %1}";
16778 gcc_assert (STACK_TOP_P (cmp_op0));
16780 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16782 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16784 if (stack_top_dies)
16786 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16787 return output_387_ffreep (operands, 1);
16789 else
16790 return "ftst\n\tfnstsw\t%0";
16793 if (STACK_REG_P (cmp_op1)
16794 && stack_top_dies
16795 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16796 && REGNO (cmp_op1) != FIRST_STACK_REG)
16798 /* If both the top of the 387 stack dies, and the other operand
16799 is also a stack register that dies, then this must be a
16800 `fcompp' float compare */
16802 if (eflags_p)
16804 /* There is no double popping fcomi variant. Fortunately,
16805 eflags is immune from the fstp's cc clobbering. */
16806 if (unordered_p)
16807 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16808 else
16809 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16810 return output_387_ffreep (operands, 0);
16812 else
16814 if (unordered_p)
16815 return "fucompp\n\tfnstsw\t%0";
16816 else
16817 return "fcompp\n\tfnstsw\t%0";
16820 else
16822 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16824 static const char * const alt[16] =
16826 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16827 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16828 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16829 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16831 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16832 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16833 NULL,
16834 NULL,
16836 "fcomi\t{%y1, %0|%0, %y1}",
16837 "fcomip\t{%y1, %0|%0, %y1}",
16838 "fucomi\t{%y1, %0|%0, %y1}",
16839 "fucomip\t{%y1, %0|%0, %y1}",
16841 NULL,
16842 NULL,
16843 NULL,
16844 NULL
16847 int mask;
16848 const char *ret;
16850 mask = eflags_p << 3;
16851 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16852 mask |= unordered_p << 1;
16853 mask |= stack_top_dies;
16855 gcc_assert (mask < 16);
16856 ret = alt[mask];
16857 gcc_assert (ret);
16859 return ret;
16863 void
16864 ix86_output_addr_vec_elt (FILE *file, int value)
16866 const char *directive = ASM_LONG;
16868 #ifdef ASM_QUAD
16869 if (TARGET_LP64)
16870 directive = ASM_QUAD;
16871 #else
16872 gcc_assert (!TARGET_64BIT);
16873 #endif
16875 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16878 void
16879 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16881 const char *directive = ASM_LONG;
16883 #ifdef ASM_QUAD
16884 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16885 directive = ASM_QUAD;
16886 #else
16887 gcc_assert (!TARGET_64BIT);
16888 #endif
16889 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16890 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16891 fprintf (file, "%s%s%d-%s%d\n",
16892 directive, LPREFIX, value, LPREFIX, rel);
16893 else if (HAVE_AS_GOTOFF_IN_DATA)
16894 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16895 #if TARGET_MACHO
16896 else if (TARGET_MACHO)
16898 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16899 machopic_output_function_base_name (file);
16900 putc ('\n', file);
16902 #endif
16903 else
16904 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16905 GOT_SYMBOL_NAME, LPREFIX, value);
16908 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16909 for the target. */
16911 void
16912 ix86_expand_clear (rtx dest)
16914 rtx tmp;
16916 /* We play register width games, which are only valid after reload. */
16917 gcc_assert (reload_completed);
16919 /* Avoid HImode and its attendant prefix byte. */
16920 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16921 dest = gen_rtx_REG (SImode, REGNO (dest));
16922 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16924 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16925 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16927 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16928 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16931 emit_insn (tmp);
16934 /* X is an unchanging MEM. If it is a constant pool reference, return
16935 the constant pool rtx, else NULL. */
16938 maybe_get_pool_constant (rtx x)
16940 x = ix86_delegitimize_address (XEXP (x, 0));
16942 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16943 return get_pool_constant (x);
16945 return NULL_RTX;
16948 void
16949 ix86_expand_move (enum machine_mode mode, rtx operands[])
16951 rtx op0, op1;
16952 enum tls_model model;
16954 op0 = operands[0];
16955 op1 = operands[1];
16957 if (GET_CODE (op1) == SYMBOL_REF)
16959 rtx tmp;
16961 model = SYMBOL_REF_TLS_MODEL (op1);
16962 if (model)
16964 op1 = legitimize_tls_address (op1, model, true);
16965 op1 = force_operand (op1, op0);
16966 if (op1 == op0)
16967 return;
16968 op1 = convert_to_mode (mode, op1, 1);
16970 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16971 op1 = tmp;
16973 else if (GET_CODE (op1) == CONST
16974 && GET_CODE (XEXP (op1, 0)) == PLUS
16975 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16977 rtx addend = XEXP (XEXP (op1, 0), 1);
16978 rtx symbol = XEXP (XEXP (op1, 0), 0);
16979 rtx tmp;
16981 model = SYMBOL_REF_TLS_MODEL (symbol);
16982 if (model)
16983 tmp = legitimize_tls_address (symbol, model, true);
16984 else
16985 tmp = legitimize_pe_coff_symbol (symbol, true);
16987 if (tmp)
16989 tmp = force_operand (tmp, NULL);
16990 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16991 op0, 1, OPTAB_DIRECT);
16992 if (tmp == op0)
16993 return;
16994 op1 = convert_to_mode (mode, tmp, 1);
16998 if ((flag_pic || MACHOPIC_INDIRECT)
16999 && symbolic_operand (op1, mode))
17001 if (TARGET_MACHO && !TARGET_64BIT)
17003 #if TARGET_MACHO
17004 /* dynamic-no-pic */
17005 if (MACHOPIC_INDIRECT)
17007 rtx temp = ((reload_in_progress
17008 || ((op0 && REG_P (op0))
17009 && mode == Pmode))
17010 ? op0 : gen_reg_rtx (Pmode));
17011 op1 = machopic_indirect_data_reference (op1, temp);
17012 if (MACHOPIC_PURE)
17013 op1 = machopic_legitimize_pic_address (op1, mode,
17014 temp == op1 ? 0 : temp);
17016 if (op0 != op1 && GET_CODE (op0) != MEM)
17018 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17019 emit_insn (insn);
17020 return;
17022 if (GET_CODE (op0) == MEM)
17023 op1 = force_reg (Pmode, op1);
17024 else
17026 rtx temp = op0;
17027 if (GET_CODE (temp) != REG)
17028 temp = gen_reg_rtx (Pmode);
17029 temp = legitimize_pic_address (op1, temp);
17030 if (temp == op0)
17031 return;
17032 op1 = temp;
17034 /* dynamic-no-pic */
17035 #endif
17037 else
17039 if (MEM_P (op0))
17040 op1 = force_reg (mode, op1);
17041 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17043 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17044 op1 = legitimize_pic_address (op1, reg);
17045 if (op0 == op1)
17046 return;
17047 op1 = convert_to_mode (mode, op1, 1);
17051 else
17053 if (MEM_P (op0)
17054 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17055 || !push_operand (op0, mode))
17056 && MEM_P (op1))
17057 op1 = force_reg (mode, op1);
17059 if (push_operand (op0, mode)
17060 && ! general_no_elim_operand (op1, mode))
17061 op1 = copy_to_mode_reg (mode, op1);
17063 /* Force large constants in 64bit compilation into register
17064 to get them CSEed. */
17065 if (can_create_pseudo_p ()
17066 && (mode == DImode) && TARGET_64BIT
17067 && immediate_operand (op1, mode)
17068 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17069 && !register_operand (op0, mode)
17070 && optimize)
17071 op1 = copy_to_mode_reg (mode, op1);
17073 if (can_create_pseudo_p ()
17074 && FLOAT_MODE_P (mode)
17075 && GET_CODE (op1) == CONST_DOUBLE)
17077 /* If we are loading a floating point constant to a register,
17078 force the value to memory now, since we'll get better code
17079 out the back end. */
17081 op1 = validize_mem (force_const_mem (mode, op1));
17082 if (!register_operand (op0, mode))
17084 rtx temp = gen_reg_rtx (mode);
17085 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17086 emit_move_insn (op0, temp);
17087 return;
17092 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17095 void
17096 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
17098 rtx op0 = operands[0], op1 = operands[1];
17099 unsigned int align = GET_MODE_ALIGNMENT (mode);
17101 if (push_operand (op0, VOIDmode))
17102 op0 = emit_move_resolve_push (mode, op0);
17104 /* Force constants other than zero into memory. We do not know how
17105 the instructions used to build constants modify the upper 64 bits
17106 of the register, once we have that information we may be able
17107 to handle some of them more efficiently. */
17108 if (can_create_pseudo_p ()
17109 && register_operand (op0, mode)
17110 && (CONSTANT_P (op1)
17111 || (GET_CODE (op1) == SUBREG
17112 && CONSTANT_P (SUBREG_REG (op1))))
17113 && !standard_sse_constant_p (op1))
17114 op1 = validize_mem (force_const_mem (mode, op1));
17116 /* We need to check memory alignment for SSE mode since attribute
17117 can make operands unaligned. */
17118 if (can_create_pseudo_p ()
17119 && SSE_REG_MODE_P (mode)
17120 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17121 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17123 rtx tmp[2];
17125 /* ix86_expand_vector_move_misalign() does not like constants ... */
17126 if (CONSTANT_P (op1)
17127 || (GET_CODE (op1) == SUBREG
17128 && CONSTANT_P (SUBREG_REG (op1))))
17129 op1 = validize_mem (force_const_mem (mode, op1));
17131 /* ... nor both arguments in memory. */
17132 if (!register_operand (op0, mode)
17133 && !register_operand (op1, mode))
17134 op1 = force_reg (mode, op1);
17136 tmp[0] = op0; tmp[1] = op1;
17137 ix86_expand_vector_move_misalign (mode, tmp);
17138 return;
17141 /* Make operand1 a register if it isn't already. */
17142 if (can_create_pseudo_p ()
17143 && !register_operand (op0, mode)
17144 && !register_operand (op1, mode))
17146 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17147 return;
17150 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17153 /* Split 32-byte AVX unaligned load and store if needed. */
17155 static void
17156 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17158 rtx m;
17159 rtx (*extract) (rtx, rtx, rtx);
17160 rtx (*load_unaligned) (rtx, rtx);
17161 rtx (*store_unaligned) (rtx, rtx);
17162 enum machine_mode mode;
17164 switch (GET_MODE (op0))
17166 default:
17167 gcc_unreachable ();
17168 case V32QImode:
17169 extract = gen_avx_vextractf128v32qi;
17170 load_unaligned = gen_avx_loaddquv32qi;
17171 store_unaligned = gen_avx_storedquv32qi;
17172 mode = V16QImode;
17173 break;
17174 case V8SFmode:
17175 extract = gen_avx_vextractf128v8sf;
17176 load_unaligned = gen_avx_loadups256;
17177 store_unaligned = gen_avx_storeups256;
17178 mode = V4SFmode;
17179 break;
17180 case V4DFmode:
17181 extract = gen_avx_vextractf128v4df;
17182 load_unaligned = gen_avx_loadupd256;
17183 store_unaligned = gen_avx_storeupd256;
17184 mode = V2DFmode;
17185 break;
17188 if (MEM_P (op1))
17190 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17192 rtx r = gen_reg_rtx (mode);
17193 m = adjust_address (op1, mode, 0);
17194 emit_move_insn (r, m);
17195 m = adjust_address (op1, mode, 16);
17196 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17197 emit_move_insn (op0, r);
17199 /* Normal *mov<mode>_internal pattern will handle
17200 unaligned loads just fine if misaligned_operand
17201 is true, and without the UNSPEC it can be combined
17202 with arithmetic instructions. */
17203 else if (misaligned_operand (op1, GET_MODE (op1)))
17204 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17205 else
17206 emit_insn (load_unaligned (op0, op1));
17208 else if (MEM_P (op0))
17210 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17212 m = adjust_address (op0, mode, 0);
17213 emit_insn (extract (m, op1, const0_rtx));
17214 m = adjust_address (op0, mode, 16);
17215 emit_insn (extract (m, op1, const1_rtx));
17217 else
17218 emit_insn (store_unaligned (op0, op1));
17220 else
17221 gcc_unreachable ();
17224 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17225 straight to ix86_expand_vector_move. */
17226 /* Code generation for scalar reg-reg moves of single and double precision data:
17227 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17228 movaps reg, reg
17229 else
17230 movss reg, reg
17231 if (x86_sse_partial_reg_dependency == true)
17232 movapd reg, reg
17233 else
17234 movsd reg, reg
17236 Code generation for scalar loads of double precision data:
17237 if (x86_sse_split_regs == true)
17238 movlpd mem, reg (gas syntax)
17239 else
17240 movsd mem, reg
17242 Code generation for unaligned packed loads of single precision data
17243 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17244 if (x86_sse_unaligned_move_optimal)
17245 movups mem, reg
17247 if (x86_sse_partial_reg_dependency == true)
17249 xorps reg, reg
17250 movlps mem, reg
17251 movhps mem+8, reg
17253 else
17255 movlps mem, reg
17256 movhps mem+8, reg
17259 Code generation for unaligned packed loads of double precision data
17260 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17261 if (x86_sse_unaligned_move_optimal)
17262 movupd mem, reg
17264 if (x86_sse_split_regs == true)
17266 movlpd mem, reg
17267 movhpd mem+8, reg
17269 else
17271 movsd mem, reg
17272 movhpd mem+8, reg
17276 void
17277 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17279 rtx op0, op1, orig_op0 = NULL_RTX, m;
17280 rtx (*load_unaligned) (rtx, rtx);
17281 rtx (*store_unaligned) (rtx, rtx);
17283 op0 = operands[0];
17284 op1 = operands[1];
17286 if (GET_MODE_SIZE (mode) == 64)
17288 switch (GET_MODE_CLASS (mode))
17290 case MODE_VECTOR_INT:
17291 case MODE_INT:
17292 if (GET_MODE (op0) != V16SImode)
17294 if (!MEM_P (op0))
17296 orig_op0 = op0;
17297 op0 = gen_reg_rtx (V16SImode);
17299 else
17300 op0 = gen_lowpart (V16SImode, op0);
17302 op1 = gen_lowpart (V16SImode, op1);
17303 /* FALLTHRU */
17305 case MODE_VECTOR_FLOAT:
17306 switch (GET_MODE (op0))
17308 default:
17309 gcc_unreachable ();
17310 case V16SImode:
17311 load_unaligned = gen_avx512f_loaddquv16si;
17312 store_unaligned = gen_avx512f_storedquv16si;
17313 break;
17314 case V16SFmode:
17315 load_unaligned = gen_avx512f_loadups512;
17316 store_unaligned = gen_avx512f_storeups512;
17317 break;
17318 case V8DFmode:
17319 load_unaligned = gen_avx512f_loadupd512;
17320 store_unaligned = gen_avx512f_storeupd512;
17321 break;
17324 if (MEM_P (op1))
17325 emit_insn (load_unaligned (op0, op1));
17326 else if (MEM_P (op0))
17327 emit_insn (store_unaligned (op0, op1));
17328 else
17329 gcc_unreachable ();
17330 if (orig_op0)
17331 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17332 break;
17334 default:
17335 gcc_unreachable ();
17338 return;
17341 if (TARGET_AVX
17342 && GET_MODE_SIZE (mode) == 32)
17344 switch (GET_MODE_CLASS (mode))
17346 case MODE_VECTOR_INT:
17347 case MODE_INT:
17348 if (GET_MODE (op0) != V32QImode)
17350 if (!MEM_P (op0))
17352 orig_op0 = op0;
17353 op0 = gen_reg_rtx (V32QImode);
17355 else
17356 op0 = gen_lowpart (V32QImode, op0);
17358 op1 = gen_lowpart (V32QImode, op1);
17359 /* FALLTHRU */
17361 case MODE_VECTOR_FLOAT:
17362 ix86_avx256_split_vector_move_misalign (op0, op1);
17363 if (orig_op0)
17364 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17365 break;
17367 default:
17368 gcc_unreachable ();
17371 return;
17374 if (MEM_P (op1))
17376 /* Normal *mov<mode>_internal pattern will handle
17377 unaligned loads just fine if misaligned_operand
17378 is true, and without the UNSPEC it can be combined
17379 with arithmetic instructions. */
17380 if (TARGET_AVX
17381 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17382 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17383 && misaligned_operand (op1, GET_MODE (op1)))
17384 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17385 /* ??? If we have typed data, then it would appear that using
17386 movdqu is the only way to get unaligned data loaded with
17387 integer type. */
17388 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17390 if (GET_MODE (op0) != V16QImode)
17392 orig_op0 = op0;
17393 op0 = gen_reg_rtx (V16QImode);
17395 op1 = gen_lowpart (V16QImode, op1);
17396 /* We will eventually emit movups based on insn attributes. */
17397 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17398 if (orig_op0)
17399 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17401 else if (TARGET_SSE2 && mode == V2DFmode)
17403 rtx zero;
17405 if (TARGET_AVX
17406 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17407 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17408 || optimize_insn_for_size_p ())
17410 /* We will eventually emit movups based on insn attributes. */
17411 emit_insn (gen_sse2_loadupd (op0, op1));
17412 return;
17415 /* When SSE registers are split into halves, we can avoid
17416 writing to the top half twice. */
17417 if (TARGET_SSE_SPLIT_REGS)
17419 emit_clobber (op0);
17420 zero = op0;
17422 else
17424 /* ??? Not sure about the best option for the Intel chips.
17425 The following would seem to satisfy; the register is
17426 entirely cleared, breaking the dependency chain. We
17427 then store to the upper half, with a dependency depth
17428 of one. A rumor has it that Intel recommends two movsd
17429 followed by an unpacklpd, but this is unconfirmed. And
17430 given that the dependency depth of the unpacklpd would
17431 still be one, I'm not sure why this would be better. */
17432 zero = CONST0_RTX (V2DFmode);
17435 m = adjust_address (op1, DFmode, 0);
17436 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17437 m = adjust_address (op1, DFmode, 8);
17438 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17440 else
17442 rtx t;
17444 if (TARGET_AVX
17445 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17446 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17447 || optimize_insn_for_size_p ())
17449 if (GET_MODE (op0) != V4SFmode)
17451 orig_op0 = op0;
17452 op0 = gen_reg_rtx (V4SFmode);
17454 op1 = gen_lowpart (V4SFmode, op1);
17455 emit_insn (gen_sse_loadups (op0, op1));
17456 if (orig_op0)
17457 emit_move_insn (orig_op0,
17458 gen_lowpart (GET_MODE (orig_op0), op0));
17459 return;
17462 if (mode != V4SFmode)
17463 t = gen_reg_rtx (V4SFmode);
17464 else
17465 t = op0;
17467 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17468 emit_move_insn (t, CONST0_RTX (V4SFmode));
17469 else
17470 emit_clobber (t);
17472 m = adjust_address (op1, V2SFmode, 0);
17473 emit_insn (gen_sse_loadlps (t, t, m));
17474 m = adjust_address (op1, V2SFmode, 8);
17475 emit_insn (gen_sse_loadhps (t, t, m));
17476 if (mode != V4SFmode)
17477 emit_move_insn (op0, gen_lowpart (mode, t));
17480 else if (MEM_P (op0))
17482 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17484 op0 = gen_lowpart (V16QImode, op0);
17485 op1 = gen_lowpart (V16QImode, op1);
17486 /* We will eventually emit movups based on insn attributes. */
17487 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17489 else if (TARGET_SSE2 && mode == V2DFmode)
17491 if (TARGET_AVX
17492 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17493 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17494 || optimize_insn_for_size_p ())
17495 /* We will eventually emit movups based on insn attributes. */
17496 emit_insn (gen_sse2_storeupd (op0, op1));
17497 else
17499 m = adjust_address (op0, DFmode, 0);
17500 emit_insn (gen_sse2_storelpd (m, op1));
17501 m = adjust_address (op0, DFmode, 8);
17502 emit_insn (gen_sse2_storehpd (m, op1));
17505 else
17507 if (mode != V4SFmode)
17508 op1 = gen_lowpart (V4SFmode, op1);
17510 if (TARGET_AVX
17511 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17512 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17513 || optimize_insn_for_size_p ())
17515 op0 = gen_lowpart (V4SFmode, op0);
17516 emit_insn (gen_sse_storeups (op0, op1));
17518 else
17520 m = adjust_address (op0, V2SFmode, 0);
17521 emit_insn (gen_sse_storelps (m, op1));
17522 m = adjust_address (op0, V2SFmode, 8);
17523 emit_insn (gen_sse_storehps (m, op1));
17527 else
17528 gcc_unreachable ();
17531 /* Helper function of ix86_fixup_binary_operands to canonicalize
17532 operand order. Returns true if the operands should be swapped. */
17534 static bool
17535 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17536 rtx operands[])
17538 rtx dst = operands[0];
17539 rtx src1 = operands[1];
17540 rtx src2 = operands[2];
17542 /* If the operation is not commutative, we can't do anything. */
17543 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17544 return false;
17546 /* Highest priority is that src1 should match dst. */
17547 if (rtx_equal_p (dst, src1))
17548 return false;
17549 if (rtx_equal_p (dst, src2))
17550 return true;
17552 /* Next highest priority is that immediate constants come second. */
17553 if (immediate_operand (src2, mode))
17554 return false;
17555 if (immediate_operand (src1, mode))
17556 return true;
17558 /* Lowest priority is that memory references should come second. */
17559 if (MEM_P (src2))
17560 return false;
17561 if (MEM_P (src1))
17562 return true;
17564 return false;
17568 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17569 destination to use for the operation. If different from the true
17570 destination in operands[0], a copy operation will be required. */
17573 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17574 rtx operands[])
17576 rtx dst = operands[0];
17577 rtx src1 = operands[1];
17578 rtx src2 = operands[2];
17580 /* Canonicalize operand order. */
17581 if (ix86_swap_binary_operands_p (code, mode, operands))
17583 rtx temp;
17585 /* It is invalid to swap operands of different modes. */
17586 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17588 temp = src1;
17589 src1 = src2;
17590 src2 = temp;
17593 /* Both source operands cannot be in memory. */
17594 if (MEM_P (src1) && MEM_P (src2))
17596 /* Optimization: Only read from memory once. */
17597 if (rtx_equal_p (src1, src2))
17599 src2 = force_reg (mode, src2);
17600 src1 = src2;
17602 else if (rtx_equal_p (dst, src1))
17603 src2 = force_reg (mode, src2);
17604 else
17605 src1 = force_reg (mode, src1);
17608 /* If the destination is memory, and we do not have matching source
17609 operands, do things in registers. */
17610 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17611 dst = gen_reg_rtx (mode);
17613 /* Source 1 cannot be a constant. */
17614 if (CONSTANT_P (src1))
17615 src1 = force_reg (mode, src1);
17617 /* Source 1 cannot be a non-matching memory. */
17618 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17619 src1 = force_reg (mode, src1);
17621 /* Improve address combine. */
17622 if (code == PLUS
17623 && GET_MODE_CLASS (mode) == MODE_INT
17624 && MEM_P (src2))
17625 src2 = force_reg (mode, src2);
17627 operands[1] = src1;
17628 operands[2] = src2;
17629 return dst;
17632 /* Similarly, but assume that the destination has already been
17633 set up properly. */
17635 void
17636 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17637 enum machine_mode mode, rtx operands[])
17639 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17640 gcc_assert (dst == operands[0]);
17643 /* Attempt to expand a binary operator. Make the expansion closer to the
17644 actual machine, then just general_operand, which will allow 3 separate
17645 memory references (one output, two input) in a single insn. */
17647 void
17648 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17649 rtx operands[])
17651 rtx src1, src2, dst, op, clob;
17653 dst = ix86_fixup_binary_operands (code, mode, operands);
17654 src1 = operands[1];
17655 src2 = operands[2];
17657 /* Emit the instruction. */
17659 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17660 if (reload_in_progress)
17662 /* Reload doesn't know about the flags register, and doesn't know that
17663 it doesn't want to clobber it. We can only do this with PLUS. */
17664 gcc_assert (code == PLUS);
17665 emit_insn (op);
17667 else if (reload_completed
17668 && code == PLUS
17669 && !rtx_equal_p (dst, src1))
17671 /* This is going to be an LEA; avoid splitting it later. */
17672 emit_insn (op);
17674 else
17676 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17677 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17680 /* Fix up the destination if needed. */
17681 if (dst != operands[0])
17682 emit_move_insn (operands[0], dst);
17685 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17686 the given OPERANDS. */
17688 void
17689 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17690 rtx operands[])
17692 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17693 if (GET_CODE (operands[1]) == SUBREG)
17695 op1 = operands[1];
17696 op2 = operands[2];
17698 else if (GET_CODE (operands[2]) == SUBREG)
17700 op1 = operands[2];
17701 op2 = operands[1];
17703 /* Optimize (__m128i) d | (__m128i) e and similar code
17704 when d and e are float vectors into float vector logical
17705 insn. In C/C++ without using intrinsics there is no other way
17706 to express vector logical operation on float vectors than
17707 to cast them temporarily to integer vectors. */
17708 if (op1
17709 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17710 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17711 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17712 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17713 && SUBREG_BYTE (op1) == 0
17714 && (GET_CODE (op2) == CONST_VECTOR
17715 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17716 && SUBREG_BYTE (op2) == 0))
17717 && can_create_pseudo_p ())
17719 rtx dst;
17720 switch (GET_MODE (SUBREG_REG (op1)))
17722 case V4SFmode:
17723 case V8SFmode:
17724 case V2DFmode:
17725 case V4DFmode:
17726 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17727 if (GET_CODE (op2) == CONST_VECTOR)
17729 op2 = gen_lowpart (GET_MODE (dst), op2);
17730 op2 = force_reg (GET_MODE (dst), op2);
17732 else
17734 op1 = operands[1];
17735 op2 = SUBREG_REG (operands[2]);
17736 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17737 op2 = force_reg (GET_MODE (dst), op2);
17739 op1 = SUBREG_REG (op1);
17740 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17741 op1 = force_reg (GET_MODE (dst), op1);
17742 emit_insn (gen_rtx_SET (VOIDmode, dst,
17743 gen_rtx_fmt_ee (code, GET_MODE (dst),
17744 op1, op2)));
17745 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17746 return;
17747 default:
17748 break;
17751 if (!nonimmediate_operand (operands[1], mode))
17752 operands[1] = force_reg (mode, operands[1]);
17753 if (!nonimmediate_operand (operands[2], mode))
17754 operands[2] = force_reg (mode, operands[2]);
17755 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17756 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17757 gen_rtx_fmt_ee (code, mode, operands[1],
17758 operands[2])));
17761 /* Return TRUE or FALSE depending on whether the binary operator meets the
17762 appropriate constraints. */
17764 bool
17765 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17766 rtx operands[3])
17768 rtx dst = operands[0];
17769 rtx src1 = operands[1];
17770 rtx src2 = operands[2];
17772 /* Both source operands cannot be in memory. */
17773 if (MEM_P (src1) && MEM_P (src2))
17774 return false;
17776 /* Canonicalize operand order for commutative operators. */
17777 if (ix86_swap_binary_operands_p (code, mode, operands))
17779 rtx temp = src1;
17780 src1 = src2;
17781 src2 = temp;
17784 /* If the destination is memory, we must have a matching source operand. */
17785 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17786 return false;
17788 /* Source 1 cannot be a constant. */
17789 if (CONSTANT_P (src1))
17790 return false;
17792 /* Source 1 cannot be a non-matching memory. */
17793 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17794 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17795 return (code == AND
17796 && (mode == HImode
17797 || mode == SImode
17798 || (TARGET_64BIT && mode == DImode))
17799 && satisfies_constraint_L (src2));
17801 return true;
17804 /* Attempt to expand a unary operator. Make the expansion closer to the
17805 actual machine, then just general_operand, which will allow 2 separate
17806 memory references (one output, one input) in a single insn. */
17808 void
17809 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17810 rtx operands[])
17812 int matching_memory;
17813 rtx src, dst, op, clob;
17815 dst = operands[0];
17816 src = operands[1];
17818 /* If the destination is memory, and we do not have matching source
17819 operands, do things in registers. */
17820 matching_memory = 0;
17821 if (MEM_P (dst))
17823 if (rtx_equal_p (dst, src))
17824 matching_memory = 1;
17825 else
17826 dst = gen_reg_rtx (mode);
17829 /* When source operand is memory, destination must match. */
17830 if (MEM_P (src) && !matching_memory)
17831 src = force_reg (mode, src);
17833 /* Emit the instruction. */
17835 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17836 if (reload_in_progress || code == NOT)
17838 /* Reload doesn't know about the flags register, and doesn't know that
17839 it doesn't want to clobber it. */
17840 gcc_assert (code == NOT);
17841 emit_insn (op);
17843 else
17845 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17846 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17849 /* Fix up the destination if needed. */
17850 if (dst != operands[0])
17851 emit_move_insn (operands[0], dst);
17854 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17855 divisor are within the range [0-255]. */
17857 void
17858 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17859 bool signed_p)
17861 rtx end_label, qimode_label;
17862 rtx insn, div, mod;
17863 rtx scratch, tmp0, tmp1, tmp2;
17864 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17865 rtx (*gen_zero_extend) (rtx, rtx);
17866 rtx (*gen_test_ccno_1) (rtx, rtx);
17868 switch (mode)
17870 case SImode:
17871 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17872 gen_test_ccno_1 = gen_testsi_ccno_1;
17873 gen_zero_extend = gen_zero_extendqisi2;
17874 break;
17875 case DImode:
17876 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17877 gen_test_ccno_1 = gen_testdi_ccno_1;
17878 gen_zero_extend = gen_zero_extendqidi2;
17879 break;
17880 default:
17881 gcc_unreachable ();
17884 end_label = gen_label_rtx ();
17885 qimode_label = gen_label_rtx ();
17887 scratch = gen_reg_rtx (mode);
17889 /* Use 8bit unsigned divimod if dividend and divisor are within
17890 the range [0-255]. */
17891 emit_move_insn (scratch, operands[2]);
17892 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17893 scratch, 1, OPTAB_DIRECT);
17894 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17895 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17896 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17897 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17898 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17899 pc_rtx);
17900 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17901 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17902 JUMP_LABEL (insn) = qimode_label;
17904 /* Generate original signed/unsigned divimod. */
17905 div = gen_divmod4_1 (operands[0], operands[1],
17906 operands[2], operands[3]);
17907 emit_insn (div);
17909 /* Branch to the end. */
17910 emit_jump_insn (gen_jump (end_label));
17911 emit_barrier ();
17913 /* Generate 8bit unsigned divide. */
17914 emit_label (qimode_label);
17915 /* Don't use operands[0] for result of 8bit divide since not all
17916 registers support QImode ZERO_EXTRACT. */
17917 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17918 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17919 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17920 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17922 if (signed_p)
17924 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17925 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17927 else
17929 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17930 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17933 /* Extract remainder from AH. */
17934 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17935 if (REG_P (operands[1]))
17936 insn = emit_move_insn (operands[1], tmp1);
17937 else
17939 /* Need a new scratch register since the old one has result
17940 of 8bit divide. */
17941 scratch = gen_reg_rtx (mode);
17942 emit_move_insn (scratch, tmp1);
17943 insn = emit_move_insn (operands[1], scratch);
17945 set_unique_reg_note (insn, REG_EQUAL, mod);
17947 /* Zero extend quotient from AL. */
17948 tmp1 = gen_lowpart (QImode, tmp0);
17949 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17950 set_unique_reg_note (insn, REG_EQUAL, div);
17952 emit_label (end_label);
17955 /* Whether it is OK to emit CFI directives when emitting asm code. */
17957 bool
17958 ix86_emit_cfi ()
17960 return dwarf2out_do_cfi_asm ();
17963 #define LEA_MAX_STALL (3)
17964 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17966 /* Increase given DISTANCE in half-cycles according to
17967 dependencies between PREV and NEXT instructions.
17968 Add 1 half-cycle if there is no dependency and
17969 go to next cycle if there is some dependecy. */
17971 static unsigned int
17972 increase_distance (rtx prev, rtx next, unsigned int distance)
17974 df_ref *use_rec;
17975 df_ref *def_rec;
17977 if (!prev || !next)
17978 return distance + (distance & 1) + 2;
17980 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17981 return distance + 1;
17983 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17984 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17985 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17986 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17987 return distance + (distance & 1) + 2;
17989 return distance + 1;
17992 /* Function checks if instruction INSN defines register number
17993 REGNO1 or REGNO2. */
17995 static bool
17996 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17997 rtx insn)
17999 df_ref *def_rec;
18001 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
18002 if (DF_REF_REG_DEF_P (*def_rec)
18003 && !DF_REF_IS_ARTIFICIAL (*def_rec)
18004 && (regno1 == DF_REF_REGNO (*def_rec)
18005 || regno2 == DF_REF_REGNO (*def_rec)))
18007 return true;
18010 return false;
18013 /* Function checks if instruction INSN uses register number
18014 REGNO as a part of address expression. */
18016 static bool
18017 insn_uses_reg_mem (unsigned int regno, rtx insn)
18019 df_ref *use_rec;
18021 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
18022 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
18023 return true;
18025 return false;
18028 /* Search backward for non-agu definition of register number REGNO1
18029 or register number REGNO2 in basic block starting from instruction
18030 START up to head of basic block or instruction INSN.
18032 Function puts true value into *FOUND var if definition was found
18033 and false otherwise.
18035 Distance in half-cycles between START and found instruction or head
18036 of BB is added to DISTANCE and returned. */
18038 static int
18039 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18040 rtx insn, int distance,
18041 rtx start, bool *found)
18043 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18044 rtx prev = start;
18045 rtx next = NULL;
18047 *found = false;
18049 while (prev
18050 && prev != insn
18051 && distance < LEA_SEARCH_THRESHOLD)
18053 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18055 distance = increase_distance (prev, next, distance);
18056 if (insn_defines_reg (regno1, regno2, prev))
18058 if (recog_memoized (prev) < 0
18059 || get_attr_type (prev) != TYPE_LEA)
18061 *found = true;
18062 return distance;
18066 next = prev;
18068 if (prev == BB_HEAD (bb))
18069 break;
18071 prev = PREV_INSN (prev);
18074 return distance;
18077 /* Search backward for non-agu definition of register number REGNO1
18078 or register number REGNO2 in INSN's basic block until
18079 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18080 2. Reach neighbour BBs boundary, or
18081 3. Reach agu definition.
18082 Returns the distance between the non-agu definition point and INSN.
18083 If no definition point, returns -1. */
18085 static int
18086 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18087 rtx insn)
18089 basic_block bb = BLOCK_FOR_INSN (insn);
18090 int distance = 0;
18091 bool found = false;
18093 if (insn != BB_HEAD (bb))
18094 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18095 distance, PREV_INSN (insn),
18096 &found);
18098 if (!found && distance < LEA_SEARCH_THRESHOLD)
18100 edge e;
18101 edge_iterator ei;
18102 bool simple_loop = false;
18104 FOR_EACH_EDGE (e, ei, bb->preds)
18105 if (e->src == bb)
18107 simple_loop = true;
18108 break;
18111 if (simple_loop)
18112 distance = distance_non_agu_define_in_bb (regno1, regno2,
18113 insn, distance,
18114 BB_END (bb), &found);
18115 else
18117 int shortest_dist = -1;
18118 bool found_in_bb = false;
18120 FOR_EACH_EDGE (e, ei, bb->preds)
18122 int bb_dist
18123 = distance_non_agu_define_in_bb (regno1, regno2,
18124 insn, distance,
18125 BB_END (e->src),
18126 &found_in_bb);
18127 if (found_in_bb)
18129 if (shortest_dist < 0)
18130 shortest_dist = bb_dist;
18131 else if (bb_dist > 0)
18132 shortest_dist = MIN (bb_dist, shortest_dist);
18134 found = true;
18138 distance = shortest_dist;
18142 /* get_attr_type may modify recog data. We want to make sure
18143 that recog data is valid for instruction INSN, on which
18144 distance_non_agu_define is called. INSN is unchanged here. */
18145 extract_insn_cached (insn);
18147 if (!found)
18148 return -1;
18150 return distance >> 1;
18153 /* Return the distance in half-cycles between INSN and the next
18154 insn that uses register number REGNO in memory address added
18155 to DISTANCE. Return -1 if REGNO0 is set.
18157 Put true value into *FOUND if register usage was found and
18158 false otherwise.
18159 Put true value into *REDEFINED if register redefinition was
18160 found and false otherwise. */
18162 static int
18163 distance_agu_use_in_bb (unsigned int regno,
18164 rtx insn, int distance, rtx start,
18165 bool *found, bool *redefined)
18167 basic_block bb = NULL;
18168 rtx next = start;
18169 rtx prev = NULL;
18171 *found = false;
18172 *redefined = false;
18174 if (start != NULL_RTX)
18176 bb = BLOCK_FOR_INSN (start);
18177 if (start != BB_HEAD (bb))
18178 /* If insn and start belong to the same bb, set prev to insn,
18179 so the call to increase_distance will increase the distance
18180 between insns by 1. */
18181 prev = insn;
18184 while (next
18185 && next != insn
18186 && distance < LEA_SEARCH_THRESHOLD)
18188 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18190 distance = increase_distance(prev, next, distance);
18191 if (insn_uses_reg_mem (regno, next))
18193 /* Return DISTANCE if OP0 is used in memory
18194 address in NEXT. */
18195 *found = true;
18196 return distance;
18199 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18201 /* Return -1 if OP0 is set in NEXT. */
18202 *redefined = true;
18203 return -1;
18206 prev = next;
18209 if (next == BB_END (bb))
18210 break;
18212 next = NEXT_INSN (next);
18215 return distance;
18218 /* Return the distance between INSN and the next insn that uses
18219 register number REGNO0 in memory address. Return -1 if no such
18220 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18222 static int
18223 distance_agu_use (unsigned int regno0, rtx insn)
18225 basic_block bb = BLOCK_FOR_INSN (insn);
18226 int distance = 0;
18227 bool found = false;
18228 bool redefined = false;
18230 if (insn != BB_END (bb))
18231 distance = distance_agu_use_in_bb (regno0, insn, distance,
18232 NEXT_INSN (insn),
18233 &found, &redefined);
18235 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18237 edge e;
18238 edge_iterator ei;
18239 bool simple_loop = false;
18241 FOR_EACH_EDGE (e, ei, bb->succs)
18242 if (e->dest == bb)
18244 simple_loop = true;
18245 break;
18248 if (simple_loop)
18249 distance = distance_agu_use_in_bb (regno0, insn,
18250 distance, BB_HEAD (bb),
18251 &found, &redefined);
18252 else
18254 int shortest_dist = -1;
18255 bool found_in_bb = false;
18256 bool redefined_in_bb = false;
18258 FOR_EACH_EDGE (e, ei, bb->succs)
18260 int bb_dist
18261 = distance_agu_use_in_bb (regno0, insn,
18262 distance, BB_HEAD (e->dest),
18263 &found_in_bb, &redefined_in_bb);
18264 if (found_in_bb)
18266 if (shortest_dist < 0)
18267 shortest_dist = bb_dist;
18268 else if (bb_dist > 0)
18269 shortest_dist = MIN (bb_dist, shortest_dist);
18271 found = true;
18275 distance = shortest_dist;
18279 if (!found || redefined)
18280 return -1;
18282 return distance >> 1;
18285 /* Define this macro to tune LEA priority vs ADD, it take effect when
18286 there is a dilemma of choicing LEA or ADD
18287 Negative value: ADD is more preferred than LEA
18288 Zero: Netrual
18289 Positive value: LEA is more preferred than ADD*/
18290 #define IX86_LEA_PRIORITY 0
18292 /* Return true if usage of lea INSN has performance advantage
18293 over a sequence of instructions. Instructions sequence has
18294 SPLIT_COST cycles higher latency than lea latency. */
18296 static bool
18297 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18298 unsigned int regno2, int split_cost, bool has_scale)
18300 int dist_define, dist_use;
18302 /* For Silvermont if using a 2-source or 3-source LEA for
18303 non-destructive destination purposes, or due to wanting
18304 ability to use SCALE, the use of LEA is justified. */
18305 if (TARGET_SILVERMONT || TARGET_INTEL)
18307 if (has_scale)
18308 return true;
18309 if (split_cost < 1)
18310 return false;
18311 if (regno0 == regno1 || regno0 == regno2)
18312 return false;
18313 return true;
18316 dist_define = distance_non_agu_define (regno1, regno2, insn);
18317 dist_use = distance_agu_use (regno0, insn);
18319 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18321 /* If there is no non AGU operand definition, no AGU
18322 operand usage and split cost is 0 then both lea
18323 and non lea variants have same priority. Currently
18324 we prefer lea for 64 bit code and non lea on 32 bit
18325 code. */
18326 if (dist_use < 0 && split_cost == 0)
18327 return TARGET_64BIT || IX86_LEA_PRIORITY;
18328 else
18329 return true;
18332 /* With longer definitions distance lea is more preferable.
18333 Here we change it to take into account splitting cost and
18334 lea priority. */
18335 dist_define += split_cost + IX86_LEA_PRIORITY;
18337 /* If there is no use in memory addess then we just check
18338 that split cost exceeds AGU stall. */
18339 if (dist_use < 0)
18340 return dist_define > LEA_MAX_STALL;
18342 /* If this insn has both backward non-agu dependence and forward
18343 agu dependence, the one with short distance takes effect. */
18344 return dist_define >= dist_use;
18347 /* Return true if it is legal to clobber flags by INSN and
18348 false otherwise. */
18350 static bool
18351 ix86_ok_to_clobber_flags (rtx insn)
18353 basic_block bb = BLOCK_FOR_INSN (insn);
18354 df_ref *use;
18355 bitmap live;
18357 while (insn)
18359 if (NONDEBUG_INSN_P (insn))
18361 for (use = DF_INSN_USES (insn); *use; use++)
18362 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18363 return false;
18365 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18366 return true;
18369 if (insn == BB_END (bb))
18370 break;
18372 insn = NEXT_INSN (insn);
18375 live = df_get_live_out(bb);
18376 return !REGNO_REG_SET_P (live, FLAGS_REG);
18379 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18380 move and add to avoid AGU stalls. */
18382 bool
18383 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18385 unsigned int regno0, regno1, regno2;
18387 /* Check if we need to optimize. */
18388 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18389 return false;
18391 /* Check it is correct to split here. */
18392 if (!ix86_ok_to_clobber_flags(insn))
18393 return false;
18395 regno0 = true_regnum (operands[0]);
18396 regno1 = true_regnum (operands[1]);
18397 regno2 = true_regnum (operands[2]);
18399 /* We need to split only adds with non destructive
18400 destination operand. */
18401 if (regno0 == regno1 || regno0 == regno2)
18402 return false;
18403 else
18404 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18407 /* Return true if we should emit lea instruction instead of mov
18408 instruction. */
18410 bool
18411 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18413 unsigned int regno0, regno1;
18415 /* Check if we need to optimize. */
18416 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18417 return false;
18419 /* Use lea for reg to reg moves only. */
18420 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18421 return false;
18423 regno0 = true_regnum (operands[0]);
18424 regno1 = true_regnum (operands[1]);
18426 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18429 /* Return true if we need to split lea into a sequence of
18430 instructions to avoid AGU stalls. */
18432 bool
18433 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18435 unsigned int regno0, regno1, regno2;
18436 int split_cost;
18437 struct ix86_address parts;
18438 int ok;
18440 /* Check we need to optimize. */
18441 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18442 return false;
18444 /* The "at least two components" test below might not catch simple
18445 move or zero extension insns if parts.base is non-NULL and parts.disp
18446 is const0_rtx as the only components in the address, e.g. if the
18447 register is %rbp or %r13. As this test is much cheaper and moves or
18448 zero extensions are the common case, do this check first. */
18449 if (REG_P (operands[1])
18450 || (SImode_address_operand (operands[1], VOIDmode)
18451 && REG_P (XEXP (operands[1], 0))))
18452 return false;
18454 /* Check if it is OK to split here. */
18455 if (!ix86_ok_to_clobber_flags (insn))
18456 return false;
18458 ok = ix86_decompose_address (operands[1], &parts);
18459 gcc_assert (ok);
18461 /* There should be at least two components in the address. */
18462 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18463 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18464 return false;
18466 /* We should not split into add if non legitimate pic
18467 operand is used as displacement. */
18468 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18469 return false;
18471 regno0 = true_regnum (operands[0]) ;
18472 regno1 = INVALID_REGNUM;
18473 regno2 = INVALID_REGNUM;
18475 if (parts.base)
18476 regno1 = true_regnum (parts.base);
18477 if (parts.index)
18478 regno2 = true_regnum (parts.index);
18480 split_cost = 0;
18482 /* Compute how many cycles we will add to execution time
18483 if split lea into a sequence of instructions. */
18484 if (parts.base || parts.index)
18486 /* Have to use mov instruction if non desctructive
18487 destination form is used. */
18488 if (regno1 != regno0 && regno2 != regno0)
18489 split_cost += 1;
18491 /* Have to add index to base if both exist. */
18492 if (parts.base && parts.index)
18493 split_cost += 1;
18495 /* Have to use shift and adds if scale is 2 or greater. */
18496 if (parts.scale > 1)
18498 if (regno0 != regno1)
18499 split_cost += 1;
18500 else if (regno2 == regno0)
18501 split_cost += 4;
18502 else
18503 split_cost += parts.scale;
18506 /* Have to use add instruction with immediate if
18507 disp is non zero. */
18508 if (parts.disp && parts.disp != const0_rtx)
18509 split_cost += 1;
18511 /* Subtract the price of lea. */
18512 split_cost -= 1;
18515 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18516 parts.scale > 1);
18519 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18520 matches destination. RTX includes clobber of FLAGS_REG. */
18522 static void
18523 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18524 rtx dst, rtx src)
18526 rtx op, clob;
18528 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18529 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18531 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18534 /* Return true if regno1 def is nearest to the insn. */
18536 static bool
18537 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18539 rtx prev = insn;
18540 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18542 if (insn == start)
18543 return false;
18544 while (prev && prev != start)
18546 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18548 prev = PREV_INSN (prev);
18549 continue;
18551 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18552 return true;
18553 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18554 return false;
18555 prev = PREV_INSN (prev);
18558 /* None of the regs is defined in the bb. */
18559 return false;
18562 /* Split lea instructions into a sequence of instructions
18563 which are executed on ALU to avoid AGU stalls.
18564 It is assumed that it is allowed to clobber flags register
18565 at lea position. */
18567 void
18568 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18570 unsigned int regno0, regno1, regno2;
18571 struct ix86_address parts;
18572 rtx target, tmp;
18573 int ok, adds;
18575 ok = ix86_decompose_address (operands[1], &parts);
18576 gcc_assert (ok);
18578 target = gen_lowpart (mode, operands[0]);
18580 regno0 = true_regnum (target);
18581 regno1 = INVALID_REGNUM;
18582 regno2 = INVALID_REGNUM;
18584 if (parts.base)
18586 parts.base = gen_lowpart (mode, parts.base);
18587 regno1 = true_regnum (parts.base);
18590 if (parts.index)
18592 parts.index = gen_lowpart (mode, parts.index);
18593 regno2 = true_regnum (parts.index);
18596 if (parts.disp)
18597 parts.disp = gen_lowpart (mode, parts.disp);
18599 if (parts.scale > 1)
18601 /* Case r1 = r1 + ... */
18602 if (regno1 == regno0)
18604 /* If we have a case r1 = r1 + C * r2 then we
18605 should use multiplication which is very
18606 expensive. Assume cost model is wrong if we
18607 have such case here. */
18608 gcc_assert (regno2 != regno0);
18610 for (adds = parts.scale; adds > 0; adds--)
18611 ix86_emit_binop (PLUS, mode, target, parts.index);
18613 else
18615 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18616 if (regno0 != regno2)
18617 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18619 /* Use shift for scaling. */
18620 ix86_emit_binop (ASHIFT, mode, target,
18621 GEN_INT (exact_log2 (parts.scale)));
18623 if (parts.base)
18624 ix86_emit_binop (PLUS, mode, target, parts.base);
18626 if (parts.disp && parts.disp != const0_rtx)
18627 ix86_emit_binop (PLUS, mode, target, parts.disp);
18630 else if (!parts.base && !parts.index)
18632 gcc_assert(parts.disp);
18633 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18635 else
18637 if (!parts.base)
18639 if (regno0 != regno2)
18640 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18642 else if (!parts.index)
18644 if (regno0 != regno1)
18645 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18647 else
18649 if (regno0 == regno1)
18650 tmp = parts.index;
18651 else if (regno0 == regno2)
18652 tmp = parts.base;
18653 else
18655 rtx tmp1;
18657 /* Find better operand for SET instruction, depending
18658 on which definition is farther from the insn. */
18659 if (find_nearest_reg_def (insn, regno1, regno2))
18660 tmp = parts.index, tmp1 = parts.base;
18661 else
18662 tmp = parts.base, tmp1 = parts.index;
18664 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18666 if (parts.disp && parts.disp != const0_rtx)
18667 ix86_emit_binop (PLUS, mode, target, parts.disp);
18669 ix86_emit_binop (PLUS, mode, target, tmp1);
18670 return;
18673 ix86_emit_binop (PLUS, mode, target, tmp);
18676 if (parts.disp && parts.disp != const0_rtx)
18677 ix86_emit_binop (PLUS, mode, target, parts.disp);
18681 /* Return true if it is ok to optimize an ADD operation to LEA
18682 operation to avoid flag register consumation. For most processors,
18683 ADD is faster than LEA. For the processors like BONNELL, if the
18684 destination register of LEA holds an actual address which will be
18685 used soon, LEA is better and otherwise ADD is better. */
18687 bool
18688 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18690 unsigned int regno0 = true_regnum (operands[0]);
18691 unsigned int regno1 = true_regnum (operands[1]);
18692 unsigned int regno2 = true_regnum (operands[2]);
18694 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18695 if (regno0 != regno1 && regno0 != regno2)
18696 return true;
18698 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18699 return false;
18701 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18704 /* Return true if destination reg of SET_BODY is shift count of
18705 USE_BODY. */
18707 static bool
18708 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18710 rtx set_dest;
18711 rtx shift_rtx;
18712 int i;
18714 /* Retrieve destination of SET_BODY. */
18715 switch (GET_CODE (set_body))
18717 case SET:
18718 set_dest = SET_DEST (set_body);
18719 if (!set_dest || !REG_P (set_dest))
18720 return false;
18721 break;
18722 case PARALLEL:
18723 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18724 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18725 use_body))
18726 return true;
18727 default:
18728 return false;
18729 break;
18732 /* Retrieve shift count of USE_BODY. */
18733 switch (GET_CODE (use_body))
18735 case SET:
18736 shift_rtx = XEXP (use_body, 1);
18737 break;
18738 case PARALLEL:
18739 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18740 if (ix86_dep_by_shift_count_body (set_body,
18741 XVECEXP (use_body, 0, i)))
18742 return true;
18743 default:
18744 return false;
18745 break;
18748 if (shift_rtx
18749 && (GET_CODE (shift_rtx) == ASHIFT
18750 || GET_CODE (shift_rtx) == LSHIFTRT
18751 || GET_CODE (shift_rtx) == ASHIFTRT
18752 || GET_CODE (shift_rtx) == ROTATE
18753 || GET_CODE (shift_rtx) == ROTATERT))
18755 rtx shift_count = XEXP (shift_rtx, 1);
18757 /* Return true if shift count is dest of SET_BODY. */
18758 if (REG_P (shift_count))
18760 /* Add check since it can be invoked before register
18761 allocation in pre-reload schedule. */
18762 if (reload_completed
18763 && true_regnum (set_dest) == true_regnum (shift_count))
18764 return true;
18765 else if (REGNO(set_dest) == REGNO(shift_count))
18766 return true;
18770 return false;
18773 /* Return true if destination reg of SET_INSN is shift count of
18774 USE_INSN. */
18776 bool
18777 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18779 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18780 PATTERN (use_insn));
18783 /* Return TRUE or FALSE depending on whether the unary operator meets the
18784 appropriate constraints. */
18786 bool
18787 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18788 enum machine_mode mode ATTRIBUTE_UNUSED,
18789 rtx operands[2])
18791 /* If one of operands is memory, source and destination must match. */
18792 if ((MEM_P (operands[0])
18793 || MEM_P (operands[1]))
18794 && ! rtx_equal_p (operands[0], operands[1]))
18795 return false;
18796 return true;
18799 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18800 are ok, keeping in mind the possible movddup alternative. */
18802 bool
18803 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18805 if (MEM_P (operands[0]))
18806 return rtx_equal_p (operands[0], operands[1 + high]);
18807 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18808 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18809 return true;
18812 /* Post-reload splitter for converting an SF or DFmode value in an
18813 SSE register into an unsigned SImode. */
18815 void
18816 ix86_split_convert_uns_si_sse (rtx operands[])
18818 enum machine_mode vecmode;
18819 rtx value, large, zero_or_two31, input, two31, x;
18821 large = operands[1];
18822 zero_or_two31 = operands[2];
18823 input = operands[3];
18824 two31 = operands[4];
18825 vecmode = GET_MODE (large);
18826 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18828 /* Load up the value into the low element. We must ensure that the other
18829 elements are valid floats -- zero is the easiest such value. */
18830 if (MEM_P (input))
18832 if (vecmode == V4SFmode)
18833 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18834 else
18835 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18837 else
18839 input = gen_rtx_REG (vecmode, REGNO (input));
18840 emit_move_insn (value, CONST0_RTX (vecmode));
18841 if (vecmode == V4SFmode)
18842 emit_insn (gen_sse_movss (value, value, input));
18843 else
18844 emit_insn (gen_sse2_movsd (value, value, input));
18847 emit_move_insn (large, two31);
18848 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18850 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18851 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18853 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18854 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18856 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18857 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18859 large = gen_rtx_REG (V4SImode, REGNO (large));
18860 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18862 x = gen_rtx_REG (V4SImode, REGNO (value));
18863 if (vecmode == V4SFmode)
18864 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18865 else
18866 emit_insn (gen_sse2_cvttpd2dq (x, value));
18867 value = x;
18869 emit_insn (gen_xorv4si3 (value, value, large));
18872 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18873 Expects the 64-bit DImode to be supplied in a pair of integral
18874 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18875 -mfpmath=sse, !optimize_size only. */
18877 void
18878 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18880 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18881 rtx int_xmm, fp_xmm;
18882 rtx biases, exponents;
18883 rtx x;
18885 int_xmm = gen_reg_rtx (V4SImode);
18886 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18887 emit_insn (gen_movdi_to_sse (int_xmm, input));
18888 else if (TARGET_SSE_SPLIT_REGS)
18890 emit_clobber (int_xmm);
18891 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18893 else
18895 x = gen_reg_rtx (V2DImode);
18896 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18897 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18900 x = gen_rtx_CONST_VECTOR (V4SImode,
18901 gen_rtvec (4, GEN_INT (0x43300000UL),
18902 GEN_INT (0x45300000UL),
18903 const0_rtx, const0_rtx));
18904 exponents = validize_mem (force_const_mem (V4SImode, x));
18906 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18907 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18909 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18910 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18911 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18912 (0x1.0p84 + double(fp_value_hi_xmm)).
18913 Note these exponents differ by 32. */
18915 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18917 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18918 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18919 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18920 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18921 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18922 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18923 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18924 biases = validize_mem (force_const_mem (V2DFmode, biases));
18925 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18927 /* Add the upper and lower DFmode values together. */
18928 if (TARGET_SSE3)
18929 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18930 else
18932 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18933 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18934 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18937 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18940 /* Not used, but eases macroization of patterns. */
18941 void
18942 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18943 rtx input ATTRIBUTE_UNUSED)
18945 gcc_unreachable ();
18948 /* Convert an unsigned SImode value into a DFmode. Only currently used
18949 for SSE, but applicable anywhere. */
18951 void
18952 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18954 REAL_VALUE_TYPE TWO31r;
18955 rtx x, fp;
18957 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18958 NULL, 1, OPTAB_DIRECT);
18960 fp = gen_reg_rtx (DFmode);
18961 emit_insn (gen_floatsidf2 (fp, x));
18963 real_ldexp (&TWO31r, &dconst1, 31);
18964 x = const_double_from_real_value (TWO31r, DFmode);
18966 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18967 if (x != target)
18968 emit_move_insn (target, x);
18971 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18972 32-bit mode; otherwise we have a direct convert instruction. */
18974 void
18975 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18977 REAL_VALUE_TYPE TWO32r;
18978 rtx fp_lo, fp_hi, x;
18980 fp_lo = gen_reg_rtx (DFmode);
18981 fp_hi = gen_reg_rtx (DFmode);
18983 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18985 real_ldexp (&TWO32r, &dconst1, 32);
18986 x = const_double_from_real_value (TWO32r, DFmode);
18987 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18989 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18991 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18992 0, OPTAB_DIRECT);
18993 if (x != target)
18994 emit_move_insn (target, x);
18997 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18998 For x86_32, -mfpmath=sse, !optimize_size only. */
18999 void
19000 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19002 REAL_VALUE_TYPE ONE16r;
19003 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19005 real_ldexp (&ONE16r, &dconst1, 16);
19006 x = const_double_from_real_value (ONE16r, SFmode);
19007 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19008 NULL, 0, OPTAB_DIRECT);
19009 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19010 NULL, 0, OPTAB_DIRECT);
19011 fp_hi = gen_reg_rtx (SFmode);
19012 fp_lo = gen_reg_rtx (SFmode);
19013 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19014 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19015 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19016 0, OPTAB_DIRECT);
19017 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19018 0, OPTAB_DIRECT);
19019 if (!rtx_equal_p (target, fp_hi))
19020 emit_move_insn (target, fp_hi);
19023 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19024 a vector of unsigned ints VAL to vector of floats TARGET. */
19026 void
19027 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19029 rtx tmp[8];
19030 REAL_VALUE_TYPE TWO16r;
19031 enum machine_mode intmode = GET_MODE (val);
19032 enum machine_mode fltmode = GET_MODE (target);
19033 rtx (*cvt) (rtx, rtx);
19035 if (intmode == V4SImode)
19036 cvt = gen_floatv4siv4sf2;
19037 else
19038 cvt = gen_floatv8siv8sf2;
19039 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19040 tmp[0] = force_reg (intmode, tmp[0]);
19041 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19042 OPTAB_DIRECT);
19043 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19044 NULL_RTX, 1, OPTAB_DIRECT);
19045 tmp[3] = gen_reg_rtx (fltmode);
19046 emit_insn (cvt (tmp[3], tmp[1]));
19047 tmp[4] = gen_reg_rtx (fltmode);
19048 emit_insn (cvt (tmp[4], tmp[2]));
19049 real_ldexp (&TWO16r, &dconst1, 16);
19050 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19051 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19052 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19053 OPTAB_DIRECT);
19054 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19055 OPTAB_DIRECT);
19056 if (tmp[7] != target)
19057 emit_move_insn (target, tmp[7]);
19060 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19061 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19062 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19063 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19066 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19068 REAL_VALUE_TYPE TWO31r;
19069 rtx two31r, tmp[4];
19070 enum machine_mode mode = GET_MODE (val);
19071 enum machine_mode scalarmode = GET_MODE_INNER (mode);
19072 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19073 rtx (*cmp) (rtx, rtx, rtx, rtx);
19074 int i;
19076 for (i = 0; i < 3; i++)
19077 tmp[i] = gen_reg_rtx (mode);
19078 real_ldexp (&TWO31r, &dconst1, 31);
19079 two31r = const_double_from_real_value (TWO31r, scalarmode);
19080 two31r = ix86_build_const_vector (mode, 1, two31r);
19081 two31r = force_reg (mode, two31r);
19082 switch (mode)
19084 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19085 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19086 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19087 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19088 default: gcc_unreachable ();
19090 tmp[3] = gen_rtx_LE (mode, two31r, val);
19091 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19092 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19093 0, OPTAB_DIRECT);
19094 if (intmode == V4SImode || TARGET_AVX2)
19095 *xorp = expand_simple_binop (intmode, ASHIFT,
19096 gen_lowpart (intmode, tmp[0]),
19097 GEN_INT (31), NULL_RTX, 0,
19098 OPTAB_DIRECT);
19099 else
19101 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19102 two31 = ix86_build_const_vector (intmode, 1, two31);
19103 *xorp = expand_simple_binop (intmode, AND,
19104 gen_lowpart (intmode, tmp[0]),
19105 two31, NULL_RTX, 0,
19106 OPTAB_DIRECT);
19108 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19109 0, OPTAB_DIRECT);
19112 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19113 then replicate the value for all elements of the vector
19114 register. */
19117 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
19119 int i, n_elt;
19120 rtvec v;
19121 enum machine_mode scalar_mode;
19123 switch (mode)
19125 case V64QImode:
19126 case V32QImode:
19127 case V16QImode:
19128 case V32HImode:
19129 case V16HImode:
19130 case V8HImode:
19131 case V16SImode:
19132 case V8SImode:
19133 case V4SImode:
19134 case V8DImode:
19135 case V4DImode:
19136 case V2DImode:
19137 gcc_assert (vect);
19138 case V16SFmode:
19139 case V8SFmode:
19140 case V4SFmode:
19141 case V8DFmode:
19142 case V4DFmode:
19143 case V2DFmode:
19144 n_elt = GET_MODE_NUNITS (mode);
19145 v = rtvec_alloc (n_elt);
19146 scalar_mode = GET_MODE_INNER (mode);
19148 RTVEC_ELT (v, 0) = value;
19150 for (i = 1; i < n_elt; ++i)
19151 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19153 return gen_rtx_CONST_VECTOR (mode, v);
19155 default:
19156 gcc_unreachable ();
19160 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19161 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19162 for an SSE register. If VECT is true, then replicate the mask for
19163 all elements of the vector register. If INVERT is true, then create
19164 a mask excluding the sign bit. */
19167 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
19169 enum machine_mode vec_mode, imode;
19170 HOST_WIDE_INT hi, lo;
19171 int shift = 63;
19172 rtx v;
19173 rtx mask;
19175 /* Find the sign bit, sign extended to 2*HWI. */
19176 switch (mode)
19178 case V16SImode:
19179 case V16SFmode:
19180 case V8SImode:
19181 case V4SImode:
19182 case V8SFmode:
19183 case V4SFmode:
19184 vec_mode = mode;
19185 mode = GET_MODE_INNER (mode);
19186 imode = SImode;
19187 lo = 0x80000000, hi = lo < 0;
19188 break;
19190 case V8DImode:
19191 case V4DImode:
19192 case V2DImode:
19193 case V8DFmode:
19194 case V4DFmode:
19195 case V2DFmode:
19196 vec_mode = mode;
19197 mode = GET_MODE_INNER (mode);
19198 imode = DImode;
19199 if (HOST_BITS_PER_WIDE_INT >= 64)
19200 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19201 else
19202 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19203 break;
19205 case TImode:
19206 case TFmode:
19207 vec_mode = VOIDmode;
19208 if (HOST_BITS_PER_WIDE_INT >= 64)
19210 imode = TImode;
19211 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19213 else
19215 rtvec vec;
19217 imode = DImode;
19218 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19220 if (invert)
19222 lo = ~lo, hi = ~hi;
19223 v = constm1_rtx;
19225 else
19226 v = const0_rtx;
19228 mask = immed_double_const (lo, hi, imode);
19230 vec = gen_rtvec (2, v, mask);
19231 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19232 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19234 return v;
19236 break;
19238 default:
19239 gcc_unreachable ();
19242 if (invert)
19243 lo = ~lo, hi = ~hi;
19245 /* Force this value into the low part of a fp vector constant. */
19246 mask = immed_double_const (lo, hi, imode);
19247 mask = gen_lowpart (mode, mask);
19249 if (vec_mode == VOIDmode)
19250 return force_reg (mode, mask);
19252 v = ix86_build_const_vector (vec_mode, vect, mask);
19253 return force_reg (vec_mode, v);
19256 /* Generate code for floating point ABS or NEG. */
19258 void
19259 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19260 rtx operands[])
19262 rtx mask, set, dst, src;
19263 bool use_sse = false;
19264 bool vector_mode = VECTOR_MODE_P (mode);
19265 enum machine_mode vmode = mode;
19267 if (vector_mode)
19268 use_sse = true;
19269 else if (mode == TFmode)
19270 use_sse = true;
19271 else if (TARGET_SSE_MATH)
19273 use_sse = SSE_FLOAT_MODE_P (mode);
19274 if (mode == SFmode)
19275 vmode = V4SFmode;
19276 else if (mode == DFmode)
19277 vmode = V2DFmode;
19280 /* NEG and ABS performed with SSE use bitwise mask operations.
19281 Create the appropriate mask now. */
19282 if (use_sse)
19283 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19284 else
19285 mask = NULL_RTX;
19287 dst = operands[0];
19288 src = operands[1];
19290 set = gen_rtx_fmt_e (code, mode, src);
19291 set = gen_rtx_SET (VOIDmode, dst, set);
19293 if (mask)
19295 rtx use, clob;
19296 rtvec par;
19298 use = gen_rtx_USE (VOIDmode, mask);
19299 if (vector_mode)
19300 par = gen_rtvec (2, set, use);
19301 else
19303 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19304 par = gen_rtvec (3, set, use, clob);
19306 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19308 else
19309 emit_insn (set);
19312 /* Expand a copysign operation. Special case operand 0 being a constant. */
19314 void
19315 ix86_expand_copysign (rtx operands[])
19317 enum machine_mode mode, vmode;
19318 rtx dest, op0, op1, mask, nmask;
19320 dest = operands[0];
19321 op0 = operands[1];
19322 op1 = operands[2];
19324 mode = GET_MODE (dest);
19326 if (mode == SFmode)
19327 vmode = V4SFmode;
19328 else if (mode == DFmode)
19329 vmode = V2DFmode;
19330 else
19331 vmode = mode;
19333 if (GET_CODE (op0) == CONST_DOUBLE)
19335 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19337 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19338 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19340 if (mode == SFmode || mode == DFmode)
19342 if (op0 == CONST0_RTX (mode))
19343 op0 = CONST0_RTX (vmode);
19344 else
19346 rtx v = ix86_build_const_vector (vmode, false, op0);
19348 op0 = force_reg (vmode, v);
19351 else if (op0 != CONST0_RTX (mode))
19352 op0 = force_reg (mode, op0);
19354 mask = ix86_build_signbit_mask (vmode, 0, 0);
19356 if (mode == SFmode)
19357 copysign_insn = gen_copysignsf3_const;
19358 else if (mode == DFmode)
19359 copysign_insn = gen_copysigndf3_const;
19360 else
19361 copysign_insn = gen_copysigntf3_const;
19363 emit_insn (copysign_insn (dest, op0, op1, mask));
19365 else
19367 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19369 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19370 mask = ix86_build_signbit_mask (vmode, 0, 0);
19372 if (mode == SFmode)
19373 copysign_insn = gen_copysignsf3_var;
19374 else if (mode == DFmode)
19375 copysign_insn = gen_copysigndf3_var;
19376 else
19377 copysign_insn = gen_copysigntf3_var;
19379 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19383 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19384 be a constant, and so has already been expanded into a vector constant. */
19386 void
19387 ix86_split_copysign_const (rtx operands[])
19389 enum machine_mode mode, vmode;
19390 rtx dest, op0, mask, x;
19392 dest = operands[0];
19393 op0 = operands[1];
19394 mask = operands[3];
19396 mode = GET_MODE (dest);
19397 vmode = GET_MODE (mask);
19399 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19400 x = gen_rtx_AND (vmode, dest, mask);
19401 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19403 if (op0 != CONST0_RTX (vmode))
19405 x = gen_rtx_IOR (vmode, dest, op0);
19406 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19410 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19411 so we have to do two masks. */
19413 void
19414 ix86_split_copysign_var (rtx operands[])
19416 enum machine_mode mode, vmode;
19417 rtx dest, scratch, op0, op1, mask, nmask, x;
19419 dest = operands[0];
19420 scratch = operands[1];
19421 op0 = operands[2];
19422 op1 = operands[3];
19423 nmask = operands[4];
19424 mask = operands[5];
19426 mode = GET_MODE (dest);
19427 vmode = GET_MODE (mask);
19429 if (rtx_equal_p (op0, op1))
19431 /* Shouldn't happen often (it's useless, obviously), but when it does
19432 we'd generate incorrect code if we continue below. */
19433 emit_move_insn (dest, op0);
19434 return;
19437 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19439 gcc_assert (REGNO (op1) == REGNO (scratch));
19441 x = gen_rtx_AND (vmode, scratch, mask);
19442 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19444 dest = mask;
19445 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19446 x = gen_rtx_NOT (vmode, dest);
19447 x = gen_rtx_AND (vmode, x, op0);
19448 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19450 else
19452 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19454 x = gen_rtx_AND (vmode, scratch, mask);
19456 else /* alternative 2,4 */
19458 gcc_assert (REGNO (mask) == REGNO (scratch));
19459 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19460 x = gen_rtx_AND (vmode, scratch, op1);
19462 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19464 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19466 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19467 x = gen_rtx_AND (vmode, dest, nmask);
19469 else /* alternative 3,4 */
19471 gcc_assert (REGNO (nmask) == REGNO (dest));
19472 dest = nmask;
19473 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19474 x = gen_rtx_AND (vmode, dest, op0);
19476 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19479 x = gen_rtx_IOR (vmode, dest, scratch);
19480 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19483 /* Return TRUE or FALSE depending on whether the first SET in INSN
19484 has source and destination with matching CC modes, and that the
19485 CC mode is at least as constrained as REQ_MODE. */
19487 bool
19488 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19490 rtx set;
19491 enum machine_mode set_mode;
19493 set = PATTERN (insn);
19494 if (GET_CODE (set) == PARALLEL)
19495 set = XVECEXP (set, 0, 0);
19496 gcc_assert (GET_CODE (set) == SET);
19497 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19499 set_mode = GET_MODE (SET_DEST (set));
19500 switch (set_mode)
19502 case CCNOmode:
19503 if (req_mode != CCNOmode
19504 && (req_mode != CCmode
19505 || XEXP (SET_SRC (set), 1) != const0_rtx))
19506 return false;
19507 break;
19508 case CCmode:
19509 if (req_mode == CCGCmode)
19510 return false;
19511 /* FALLTHRU */
19512 case CCGCmode:
19513 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19514 return false;
19515 /* FALLTHRU */
19516 case CCGOCmode:
19517 if (req_mode == CCZmode)
19518 return false;
19519 /* FALLTHRU */
19520 case CCZmode:
19521 break;
19523 case CCAmode:
19524 case CCCmode:
19525 case CCOmode:
19526 case CCSmode:
19527 if (set_mode != req_mode)
19528 return false;
19529 break;
19531 default:
19532 gcc_unreachable ();
19535 return GET_MODE (SET_SRC (set)) == set_mode;
19538 /* Generate insn patterns to do an integer compare of OPERANDS. */
19540 static rtx
19541 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19543 enum machine_mode cmpmode;
19544 rtx tmp, flags;
19546 cmpmode = SELECT_CC_MODE (code, op0, op1);
19547 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19549 /* This is very simple, but making the interface the same as in the
19550 FP case makes the rest of the code easier. */
19551 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19552 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19554 /* Return the test that should be put into the flags user, i.e.
19555 the bcc, scc, or cmov instruction. */
19556 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19559 /* Figure out whether to use ordered or unordered fp comparisons.
19560 Return the appropriate mode to use. */
19562 enum machine_mode
19563 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19565 /* ??? In order to make all comparisons reversible, we do all comparisons
19566 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19567 all forms trapping and nontrapping comparisons, we can make inequality
19568 comparisons trapping again, since it results in better code when using
19569 FCOM based compares. */
19570 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19573 enum machine_mode
19574 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19576 enum machine_mode mode = GET_MODE (op0);
19578 if (SCALAR_FLOAT_MODE_P (mode))
19580 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19581 return ix86_fp_compare_mode (code);
19584 switch (code)
19586 /* Only zero flag is needed. */
19587 case EQ: /* ZF=0 */
19588 case NE: /* ZF!=0 */
19589 return CCZmode;
19590 /* Codes needing carry flag. */
19591 case GEU: /* CF=0 */
19592 case LTU: /* CF=1 */
19593 /* Detect overflow checks. They need just the carry flag. */
19594 if (GET_CODE (op0) == PLUS
19595 && rtx_equal_p (op1, XEXP (op0, 0)))
19596 return CCCmode;
19597 else
19598 return CCmode;
19599 case GTU: /* CF=0 & ZF=0 */
19600 case LEU: /* CF=1 | ZF=1 */
19601 return CCmode;
19602 /* Codes possibly doable only with sign flag when
19603 comparing against zero. */
19604 case GE: /* SF=OF or SF=0 */
19605 case LT: /* SF<>OF or SF=1 */
19606 if (op1 == const0_rtx)
19607 return CCGOCmode;
19608 else
19609 /* For other cases Carry flag is not required. */
19610 return CCGCmode;
19611 /* Codes doable only with sign flag when comparing
19612 against zero, but we miss jump instruction for it
19613 so we need to use relational tests against overflow
19614 that thus needs to be zero. */
19615 case GT: /* ZF=0 & SF=OF */
19616 case LE: /* ZF=1 | SF<>OF */
19617 if (op1 == const0_rtx)
19618 return CCNOmode;
19619 else
19620 return CCGCmode;
19621 /* strcmp pattern do (use flags) and combine may ask us for proper
19622 mode. */
19623 case USE:
19624 return CCmode;
19625 default:
19626 gcc_unreachable ();
19630 /* Return the fixed registers used for condition codes. */
19632 static bool
19633 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19635 *p1 = FLAGS_REG;
19636 *p2 = FPSR_REG;
19637 return true;
19640 /* If two condition code modes are compatible, return a condition code
19641 mode which is compatible with both. Otherwise, return
19642 VOIDmode. */
19644 static enum machine_mode
19645 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19647 if (m1 == m2)
19648 return m1;
19650 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19651 return VOIDmode;
19653 if ((m1 == CCGCmode && m2 == CCGOCmode)
19654 || (m1 == CCGOCmode && m2 == CCGCmode))
19655 return CCGCmode;
19657 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19658 return m2;
19659 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19660 return m1;
19662 switch (m1)
19664 default:
19665 gcc_unreachable ();
19667 case CCmode:
19668 case CCGCmode:
19669 case CCGOCmode:
19670 case CCNOmode:
19671 case CCAmode:
19672 case CCCmode:
19673 case CCOmode:
19674 case CCSmode:
19675 case CCZmode:
19676 switch (m2)
19678 default:
19679 return VOIDmode;
19681 case CCmode:
19682 case CCGCmode:
19683 case CCGOCmode:
19684 case CCNOmode:
19685 case CCAmode:
19686 case CCCmode:
19687 case CCOmode:
19688 case CCSmode:
19689 case CCZmode:
19690 return CCmode;
19693 case CCFPmode:
19694 case CCFPUmode:
19695 /* These are only compatible with themselves, which we already
19696 checked above. */
19697 return VOIDmode;
19702 /* Return a comparison we can do and that it is equivalent to
19703 swap_condition (code) apart possibly from orderedness.
19704 But, never change orderedness if TARGET_IEEE_FP, returning
19705 UNKNOWN in that case if necessary. */
19707 static enum rtx_code
19708 ix86_fp_swap_condition (enum rtx_code code)
19710 switch (code)
19712 case GT: /* GTU - CF=0 & ZF=0 */
19713 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19714 case GE: /* GEU - CF=0 */
19715 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19716 case UNLT: /* LTU - CF=1 */
19717 return TARGET_IEEE_FP ? UNKNOWN : GT;
19718 case UNLE: /* LEU - CF=1 | ZF=1 */
19719 return TARGET_IEEE_FP ? UNKNOWN : GE;
19720 default:
19721 return swap_condition (code);
19725 /* Return cost of comparison CODE using the best strategy for performance.
19726 All following functions do use number of instructions as a cost metrics.
19727 In future this should be tweaked to compute bytes for optimize_size and
19728 take into account performance of various instructions on various CPUs. */
19730 static int
19731 ix86_fp_comparison_cost (enum rtx_code code)
19733 int arith_cost;
19735 /* The cost of code using bit-twiddling on %ah. */
19736 switch (code)
19738 case UNLE:
19739 case UNLT:
19740 case LTGT:
19741 case GT:
19742 case GE:
19743 case UNORDERED:
19744 case ORDERED:
19745 case UNEQ:
19746 arith_cost = 4;
19747 break;
19748 case LT:
19749 case NE:
19750 case EQ:
19751 case UNGE:
19752 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19753 break;
19754 case LE:
19755 case UNGT:
19756 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19757 break;
19758 default:
19759 gcc_unreachable ();
19762 switch (ix86_fp_comparison_strategy (code))
19764 case IX86_FPCMP_COMI:
19765 return arith_cost > 4 ? 3 : 2;
19766 case IX86_FPCMP_SAHF:
19767 return arith_cost > 4 ? 4 : 3;
19768 default:
19769 return arith_cost;
19773 /* Return strategy to use for floating-point. We assume that fcomi is always
19774 preferrable where available, since that is also true when looking at size
19775 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19777 enum ix86_fpcmp_strategy
19778 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19780 /* Do fcomi/sahf based test when profitable. */
19782 if (TARGET_CMOVE)
19783 return IX86_FPCMP_COMI;
19785 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19786 return IX86_FPCMP_SAHF;
19788 return IX86_FPCMP_ARITH;
19791 /* Swap, force into registers, or otherwise massage the two operands
19792 to a fp comparison. The operands are updated in place; the new
19793 comparison code is returned. */
19795 static enum rtx_code
19796 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19798 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19799 rtx op0 = *pop0, op1 = *pop1;
19800 enum machine_mode op_mode = GET_MODE (op0);
19801 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19803 /* All of the unordered compare instructions only work on registers.
19804 The same is true of the fcomi compare instructions. The XFmode
19805 compare instructions require registers except when comparing
19806 against zero or when converting operand 1 from fixed point to
19807 floating point. */
19809 if (!is_sse
19810 && (fpcmp_mode == CCFPUmode
19811 || (op_mode == XFmode
19812 && ! (standard_80387_constant_p (op0) == 1
19813 || standard_80387_constant_p (op1) == 1)
19814 && GET_CODE (op1) != FLOAT)
19815 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19817 op0 = force_reg (op_mode, op0);
19818 op1 = force_reg (op_mode, op1);
19820 else
19822 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19823 things around if they appear profitable, otherwise force op0
19824 into a register. */
19826 if (standard_80387_constant_p (op0) == 0
19827 || (MEM_P (op0)
19828 && ! (standard_80387_constant_p (op1) == 0
19829 || MEM_P (op1))))
19831 enum rtx_code new_code = ix86_fp_swap_condition (code);
19832 if (new_code != UNKNOWN)
19834 rtx tmp;
19835 tmp = op0, op0 = op1, op1 = tmp;
19836 code = new_code;
19840 if (!REG_P (op0))
19841 op0 = force_reg (op_mode, op0);
19843 if (CONSTANT_P (op1))
19845 int tmp = standard_80387_constant_p (op1);
19846 if (tmp == 0)
19847 op1 = validize_mem (force_const_mem (op_mode, op1));
19848 else if (tmp == 1)
19850 if (TARGET_CMOVE)
19851 op1 = force_reg (op_mode, op1);
19853 else
19854 op1 = force_reg (op_mode, op1);
19858 /* Try to rearrange the comparison to make it cheaper. */
19859 if (ix86_fp_comparison_cost (code)
19860 > ix86_fp_comparison_cost (swap_condition (code))
19861 && (REG_P (op1) || can_create_pseudo_p ()))
19863 rtx tmp;
19864 tmp = op0, op0 = op1, op1 = tmp;
19865 code = swap_condition (code);
19866 if (!REG_P (op0))
19867 op0 = force_reg (op_mode, op0);
19870 *pop0 = op0;
19871 *pop1 = op1;
19872 return code;
19875 /* Convert comparison codes we use to represent FP comparison to integer
19876 code that will result in proper branch. Return UNKNOWN if no such code
19877 is available. */
19879 enum rtx_code
19880 ix86_fp_compare_code_to_integer (enum rtx_code code)
19882 switch (code)
19884 case GT:
19885 return GTU;
19886 case GE:
19887 return GEU;
19888 case ORDERED:
19889 case UNORDERED:
19890 return code;
19891 break;
19892 case UNEQ:
19893 return EQ;
19894 break;
19895 case UNLT:
19896 return LTU;
19897 break;
19898 case UNLE:
19899 return LEU;
19900 break;
19901 case LTGT:
19902 return NE;
19903 break;
19904 default:
19905 return UNKNOWN;
19909 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19911 static rtx
19912 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19914 enum machine_mode fpcmp_mode, intcmp_mode;
19915 rtx tmp, tmp2;
19917 fpcmp_mode = ix86_fp_compare_mode (code);
19918 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19920 /* Do fcomi/sahf based test when profitable. */
19921 switch (ix86_fp_comparison_strategy (code))
19923 case IX86_FPCMP_COMI:
19924 intcmp_mode = fpcmp_mode;
19925 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19926 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19927 tmp);
19928 emit_insn (tmp);
19929 break;
19931 case IX86_FPCMP_SAHF:
19932 intcmp_mode = fpcmp_mode;
19933 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19934 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19935 tmp);
19937 if (!scratch)
19938 scratch = gen_reg_rtx (HImode);
19939 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19940 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19941 break;
19943 case IX86_FPCMP_ARITH:
19944 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19945 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19946 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19947 if (!scratch)
19948 scratch = gen_reg_rtx (HImode);
19949 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19951 /* In the unordered case, we have to check C2 for NaN's, which
19952 doesn't happen to work out to anything nice combination-wise.
19953 So do some bit twiddling on the value we've got in AH to come
19954 up with an appropriate set of condition codes. */
19956 intcmp_mode = CCNOmode;
19957 switch (code)
19959 case GT:
19960 case UNGT:
19961 if (code == GT || !TARGET_IEEE_FP)
19963 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19964 code = EQ;
19966 else
19968 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19969 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19970 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19971 intcmp_mode = CCmode;
19972 code = GEU;
19974 break;
19975 case LT:
19976 case UNLT:
19977 if (code == LT && TARGET_IEEE_FP)
19979 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19980 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19981 intcmp_mode = CCmode;
19982 code = EQ;
19984 else
19986 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19987 code = NE;
19989 break;
19990 case GE:
19991 case UNGE:
19992 if (code == GE || !TARGET_IEEE_FP)
19994 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19995 code = EQ;
19997 else
19999 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20000 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20001 code = NE;
20003 break;
20004 case LE:
20005 case UNLE:
20006 if (code == LE && TARGET_IEEE_FP)
20008 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20009 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20010 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20011 intcmp_mode = CCmode;
20012 code = LTU;
20014 else
20016 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20017 code = NE;
20019 break;
20020 case EQ:
20021 case UNEQ:
20022 if (code == EQ && TARGET_IEEE_FP)
20024 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20025 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20026 intcmp_mode = CCmode;
20027 code = EQ;
20029 else
20031 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20032 code = NE;
20034 break;
20035 case NE:
20036 case LTGT:
20037 if (code == NE && TARGET_IEEE_FP)
20039 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20040 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20041 GEN_INT (0x40)));
20042 code = NE;
20044 else
20046 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20047 code = EQ;
20049 break;
20051 case UNORDERED:
20052 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20053 code = NE;
20054 break;
20055 case ORDERED:
20056 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20057 code = EQ;
20058 break;
20060 default:
20061 gcc_unreachable ();
20063 break;
20065 default:
20066 gcc_unreachable();
20069 /* Return the test that should be put into the flags user, i.e.
20070 the bcc, scc, or cmov instruction. */
20071 return gen_rtx_fmt_ee (code, VOIDmode,
20072 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20073 const0_rtx);
20076 static rtx
20077 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20079 rtx ret;
20081 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20082 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20084 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20086 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20087 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20089 else
20090 ret = ix86_expand_int_compare (code, op0, op1);
20092 return ret;
20095 void
20096 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20098 enum machine_mode mode = GET_MODE (op0);
20099 rtx tmp;
20101 switch (mode)
20103 case SFmode:
20104 case DFmode:
20105 case XFmode:
20106 case QImode:
20107 case HImode:
20108 case SImode:
20109 simple:
20110 tmp = ix86_expand_compare (code, op0, op1);
20111 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20112 gen_rtx_LABEL_REF (VOIDmode, label),
20113 pc_rtx);
20114 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20115 return;
20117 case DImode:
20118 if (TARGET_64BIT)
20119 goto simple;
20120 case TImode:
20121 /* Expand DImode branch into multiple compare+branch. */
20123 rtx lo[2], hi[2], label2;
20124 enum rtx_code code1, code2, code3;
20125 enum machine_mode submode;
20127 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20129 tmp = op0, op0 = op1, op1 = tmp;
20130 code = swap_condition (code);
20133 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20134 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20136 submode = mode == DImode ? SImode : DImode;
20138 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20139 avoid two branches. This costs one extra insn, so disable when
20140 optimizing for size. */
20142 if ((code == EQ || code == NE)
20143 && (!optimize_insn_for_size_p ()
20144 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20146 rtx xor0, xor1;
20148 xor1 = hi[0];
20149 if (hi[1] != const0_rtx)
20150 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20151 NULL_RTX, 0, OPTAB_WIDEN);
20153 xor0 = lo[0];
20154 if (lo[1] != const0_rtx)
20155 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20156 NULL_RTX, 0, OPTAB_WIDEN);
20158 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20159 NULL_RTX, 0, OPTAB_WIDEN);
20161 ix86_expand_branch (code, tmp, const0_rtx, label);
20162 return;
20165 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20166 op1 is a constant and the low word is zero, then we can just
20167 examine the high word. Similarly for low word -1 and
20168 less-or-equal-than or greater-than. */
20170 if (CONST_INT_P (hi[1]))
20171 switch (code)
20173 case LT: case LTU: case GE: case GEU:
20174 if (lo[1] == const0_rtx)
20176 ix86_expand_branch (code, hi[0], hi[1], label);
20177 return;
20179 break;
20180 case LE: case LEU: case GT: case GTU:
20181 if (lo[1] == constm1_rtx)
20183 ix86_expand_branch (code, hi[0], hi[1], label);
20184 return;
20186 break;
20187 default:
20188 break;
20191 /* Otherwise, we need two or three jumps. */
20193 label2 = gen_label_rtx ();
20195 code1 = code;
20196 code2 = swap_condition (code);
20197 code3 = unsigned_condition (code);
20199 switch (code)
20201 case LT: case GT: case LTU: case GTU:
20202 break;
20204 case LE: code1 = LT; code2 = GT; break;
20205 case GE: code1 = GT; code2 = LT; break;
20206 case LEU: code1 = LTU; code2 = GTU; break;
20207 case GEU: code1 = GTU; code2 = LTU; break;
20209 case EQ: code1 = UNKNOWN; code2 = NE; break;
20210 case NE: code2 = UNKNOWN; break;
20212 default:
20213 gcc_unreachable ();
20217 * a < b =>
20218 * if (hi(a) < hi(b)) goto true;
20219 * if (hi(a) > hi(b)) goto false;
20220 * if (lo(a) < lo(b)) goto true;
20221 * false:
20224 if (code1 != UNKNOWN)
20225 ix86_expand_branch (code1, hi[0], hi[1], label);
20226 if (code2 != UNKNOWN)
20227 ix86_expand_branch (code2, hi[0], hi[1], label2);
20229 ix86_expand_branch (code3, lo[0], lo[1], label);
20231 if (code2 != UNKNOWN)
20232 emit_label (label2);
20233 return;
20236 default:
20237 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20238 goto simple;
20242 /* Split branch based on floating point condition. */
20243 void
20244 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20245 rtx target1, rtx target2, rtx tmp)
20247 rtx condition;
20248 rtx i;
20250 if (target2 != pc_rtx)
20252 rtx tmp = target2;
20253 code = reverse_condition_maybe_unordered (code);
20254 target2 = target1;
20255 target1 = tmp;
20258 condition = ix86_expand_fp_compare (code, op1, op2,
20259 tmp);
20261 i = emit_jump_insn (gen_rtx_SET
20262 (VOIDmode, pc_rtx,
20263 gen_rtx_IF_THEN_ELSE (VOIDmode,
20264 condition, target1, target2)));
20265 if (split_branch_probability >= 0)
20266 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20269 void
20270 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20272 rtx ret;
20274 gcc_assert (GET_MODE (dest) == QImode);
20276 ret = ix86_expand_compare (code, op0, op1);
20277 PUT_MODE (ret, QImode);
20278 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20281 /* Expand comparison setting or clearing carry flag. Return true when
20282 successful and set pop for the operation. */
20283 static bool
20284 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20286 enum machine_mode mode =
20287 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20289 /* Do not handle double-mode compares that go through special path. */
20290 if (mode == (TARGET_64BIT ? TImode : DImode))
20291 return false;
20293 if (SCALAR_FLOAT_MODE_P (mode))
20295 rtx compare_op, compare_seq;
20297 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20299 /* Shortcut: following common codes never translate
20300 into carry flag compares. */
20301 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20302 || code == ORDERED || code == UNORDERED)
20303 return false;
20305 /* These comparisons require zero flag; swap operands so they won't. */
20306 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20307 && !TARGET_IEEE_FP)
20309 rtx tmp = op0;
20310 op0 = op1;
20311 op1 = tmp;
20312 code = swap_condition (code);
20315 /* Try to expand the comparison and verify that we end up with
20316 carry flag based comparison. This fails to be true only when
20317 we decide to expand comparison using arithmetic that is not
20318 too common scenario. */
20319 start_sequence ();
20320 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20321 compare_seq = get_insns ();
20322 end_sequence ();
20324 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20325 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20326 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20327 else
20328 code = GET_CODE (compare_op);
20330 if (code != LTU && code != GEU)
20331 return false;
20333 emit_insn (compare_seq);
20334 *pop = compare_op;
20335 return true;
20338 if (!INTEGRAL_MODE_P (mode))
20339 return false;
20341 switch (code)
20343 case LTU:
20344 case GEU:
20345 break;
20347 /* Convert a==0 into (unsigned)a<1. */
20348 case EQ:
20349 case NE:
20350 if (op1 != const0_rtx)
20351 return false;
20352 op1 = const1_rtx;
20353 code = (code == EQ ? LTU : GEU);
20354 break;
20356 /* Convert a>b into b<a or a>=b-1. */
20357 case GTU:
20358 case LEU:
20359 if (CONST_INT_P (op1))
20361 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20362 /* Bail out on overflow. We still can swap operands but that
20363 would force loading of the constant into register. */
20364 if (op1 == const0_rtx
20365 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20366 return false;
20367 code = (code == GTU ? GEU : LTU);
20369 else
20371 rtx tmp = op1;
20372 op1 = op0;
20373 op0 = tmp;
20374 code = (code == GTU ? LTU : GEU);
20376 break;
20378 /* Convert a>=0 into (unsigned)a<0x80000000. */
20379 case LT:
20380 case GE:
20381 if (mode == DImode || op1 != const0_rtx)
20382 return false;
20383 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20384 code = (code == LT ? GEU : LTU);
20385 break;
20386 case LE:
20387 case GT:
20388 if (mode == DImode || op1 != constm1_rtx)
20389 return false;
20390 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20391 code = (code == LE ? GEU : LTU);
20392 break;
20394 default:
20395 return false;
20397 /* Swapping operands may cause constant to appear as first operand. */
20398 if (!nonimmediate_operand (op0, VOIDmode))
20400 if (!can_create_pseudo_p ())
20401 return false;
20402 op0 = force_reg (mode, op0);
20404 *pop = ix86_expand_compare (code, op0, op1);
20405 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20406 return true;
20409 bool
20410 ix86_expand_int_movcc (rtx operands[])
20412 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20413 rtx compare_seq, compare_op;
20414 enum machine_mode mode = GET_MODE (operands[0]);
20415 bool sign_bit_compare_p = false;
20416 rtx op0 = XEXP (operands[1], 0);
20417 rtx op1 = XEXP (operands[1], 1);
20419 if (GET_MODE (op0) == TImode
20420 || (GET_MODE (op0) == DImode
20421 && !TARGET_64BIT))
20422 return false;
20424 start_sequence ();
20425 compare_op = ix86_expand_compare (code, op0, op1);
20426 compare_seq = get_insns ();
20427 end_sequence ();
20429 compare_code = GET_CODE (compare_op);
20431 if ((op1 == const0_rtx && (code == GE || code == LT))
20432 || (op1 == constm1_rtx && (code == GT || code == LE)))
20433 sign_bit_compare_p = true;
20435 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20436 HImode insns, we'd be swallowed in word prefix ops. */
20438 if ((mode != HImode || TARGET_FAST_PREFIX)
20439 && (mode != (TARGET_64BIT ? TImode : DImode))
20440 && CONST_INT_P (operands[2])
20441 && CONST_INT_P (operands[3]))
20443 rtx out = operands[0];
20444 HOST_WIDE_INT ct = INTVAL (operands[2]);
20445 HOST_WIDE_INT cf = INTVAL (operands[3]);
20446 HOST_WIDE_INT diff;
20448 diff = ct - cf;
20449 /* Sign bit compares are better done using shifts than we do by using
20450 sbb. */
20451 if (sign_bit_compare_p
20452 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20454 /* Detect overlap between destination and compare sources. */
20455 rtx tmp = out;
20457 if (!sign_bit_compare_p)
20459 rtx flags;
20460 bool fpcmp = false;
20462 compare_code = GET_CODE (compare_op);
20464 flags = XEXP (compare_op, 0);
20466 if (GET_MODE (flags) == CCFPmode
20467 || GET_MODE (flags) == CCFPUmode)
20469 fpcmp = true;
20470 compare_code
20471 = ix86_fp_compare_code_to_integer (compare_code);
20474 /* To simplify rest of code, restrict to the GEU case. */
20475 if (compare_code == LTU)
20477 HOST_WIDE_INT tmp = ct;
20478 ct = cf;
20479 cf = tmp;
20480 compare_code = reverse_condition (compare_code);
20481 code = reverse_condition (code);
20483 else
20485 if (fpcmp)
20486 PUT_CODE (compare_op,
20487 reverse_condition_maybe_unordered
20488 (GET_CODE (compare_op)));
20489 else
20490 PUT_CODE (compare_op,
20491 reverse_condition (GET_CODE (compare_op)));
20493 diff = ct - cf;
20495 if (reg_overlap_mentioned_p (out, op0)
20496 || reg_overlap_mentioned_p (out, op1))
20497 tmp = gen_reg_rtx (mode);
20499 if (mode == DImode)
20500 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20501 else
20502 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20503 flags, compare_op));
20505 else
20507 if (code == GT || code == GE)
20508 code = reverse_condition (code);
20509 else
20511 HOST_WIDE_INT tmp = ct;
20512 ct = cf;
20513 cf = tmp;
20514 diff = ct - cf;
20516 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20519 if (diff == 1)
20522 * cmpl op0,op1
20523 * sbbl dest,dest
20524 * [addl dest, ct]
20526 * Size 5 - 8.
20528 if (ct)
20529 tmp = expand_simple_binop (mode, PLUS,
20530 tmp, GEN_INT (ct),
20531 copy_rtx (tmp), 1, OPTAB_DIRECT);
20533 else if (cf == -1)
20536 * cmpl op0,op1
20537 * sbbl dest,dest
20538 * orl $ct, dest
20540 * Size 8.
20542 tmp = expand_simple_binop (mode, IOR,
20543 tmp, GEN_INT (ct),
20544 copy_rtx (tmp), 1, OPTAB_DIRECT);
20546 else if (diff == -1 && ct)
20549 * cmpl op0,op1
20550 * sbbl dest,dest
20551 * notl dest
20552 * [addl dest, cf]
20554 * Size 8 - 11.
20556 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20557 if (cf)
20558 tmp = expand_simple_binop (mode, PLUS,
20559 copy_rtx (tmp), GEN_INT (cf),
20560 copy_rtx (tmp), 1, OPTAB_DIRECT);
20562 else
20565 * cmpl op0,op1
20566 * sbbl dest,dest
20567 * [notl dest]
20568 * andl cf - ct, dest
20569 * [addl dest, ct]
20571 * Size 8 - 11.
20574 if (cf == 0)
20576 cf = ct;
20577 ct = 0;
20578 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20581 tmp = expand_simple_binop (mode, AND,
20582 copy_rtx (tmp),
20583 gen_int_mode (cf - ct, mode),
20584 copy_rtx (tmp), 1, OPTAB_DIRECT);
20585 if (ct)
20586 tmp = expand_simple_binop (mode, PLUS,
20587 copy_rtx (tmp), GEN_INT (ct),
20588 copy_rtx (tmp), 1, OPTAB_DIRECT);
20591 if (!rtx_equal_p (tmp, out))
20592 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20594 return true;
20597 if (diff < 0)
20599 enum machine_mode cmp_mode = GET_MODE (op0);
20601 HOST_WIDE_INT tmp;
20602 tmp = ct, ct = cf, cf = tmp;
20603 diff = -diff;
20605 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20607 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20609 /* We may be reversing unordered compare to normal compare, that
20610 is not valid in general (we may convert non-trapping condition
20611 to trapping one), however on i386 we currently emit all
20612 comparisons unordered. */
20613 compare_code = reverse_condition_maybe_unordered (compare_code);
20614 code = reverse_condition_maybe_unordered (code);
20616 else
20618 compare_code = reverse_condition (compare_code);
20619 code = reverse_condition (code);
20623 compare_code = UNKNOWN;
20624 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20625 && CONST_INT_P (op1))
20627 if (op1 == const0_rtx
20628 && (code == LT || code == GE))
20629 compare_code = code;
20630 else if (op1 == constm1_rtx)
20632 if (code == LE)
20633 compare_code = LT;
20634 else if (code == GT)
20635 compare_code = GE;
20639 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20640 if (compare_code != UNKNOWN
20641 && GET_MODE (op0) == GET_MODE (out)
20642 && (cf == -1 || ct == -1))
20644 /* If lea code below could be used, only optimize
20645 if it results in a 2 insn sequence. */
20647 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20648 || diff == 3 || diff == 5 || diff == 9)
20649 || (compare_code == LT && ct == -1)
20650 || (compare_code == GE && cf == -1))
20653 * notl op1 (if necessary)
20654 * sarl $31, op1
20655 * orl cf, op1
20657 if (ct != -1)
20659 cf = ct;
20660 ct = -1;
20661 code = reverse_condition (code);
20664 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20666 out = expand_simple_binop (mode, IOR,
20667 out, GEN_INT (cf),
20668 out, 1, OPTAB_DIRECT);
20669 if (out != operands[0])
20670 emit_move_insn (operands[0], out);
20672 return true;
20677 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20678 || diff == 3 || diff == 5 || diff == 9)
20679 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20680 && (mode != DImode
20681 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20684 * xorl dest,dest
20685 * cmpl op1,op2
20686 * setcc dest
20687 * lea cf(dest*(ct-cf)),dest
20689 * Size 14.
20691 * This also catches the degenerate setcc-only case.
20694 rtx tmp;
20695 int nops;
20697 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20699 nops = 0;
20700 /* On x86_64 the lea instruction operates on Pmode, so we need
20701 to get arithmetics done in proper mode to match. */
20702 if (diff == 1)
20703 tmp = copy_rtx (out);
20704 else
20706 rtx out1;
20707 out1 = copy_rtx (out);
20708 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20709 nops++;
20710 if (diff & 1)
20712 tmp = gen_rtx_PLUS (mode, tmp, out1);
20713 nops++;
20716 if (cf != 0)
20718 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20719 nops++;
20721 if (!rtx_equal_p (tmp, out))
20723 if (nops == 1)
20724 out = force_operand (tmp, copy_rtx (out));
20725 else
20726 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20728 if (!rtx_equal_p (out, operands[0]))
20729 emit_move_insn (operands[0], copy_rtx (out));
20731 return true;
20735 * General case: Jumpful:
20736 * xorl dest,dest cmpl op1, op2
20737 * cmpl op1, op2 movl ct, dest
20738 * setcc dest jcc 1f
20739 * decl dest movl cf, dest
20740 * andl (cf-ct),dest 1:
20741 * addl ct,dest
20743 * Size 20. Size 14.
20745 * This is reasonably steep, but branch mispredict costs are
20746 * high on modern cpus, so consider failing only if optimizing
20747 * for space.
20750 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20751 && BRANCH_COST (optimize_insn_for_speed_p (),
20752 false) >= 2)
20754 if (cf == 0)
20756 enum machine_mode cmp_mode = GET_MODE (op0);
20758 cf = ct;
20759 ct = 0;
20761 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20763 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20765 /* We may be reversing unordered compare to normal compare,
20766 that is not valid in general (we may convert non-trapping
20767 condition to trapping one), however on i386 we currently
20768 emit all comparisons unordered. */
20769 code = reverse_condition_maybe_unordered (code);
20771 else
20773 code = reverse_condition (code);
20774 if (compare_code != UNKNOWN)
20775 compare_code = reverse_condition (compare_code);
20779 if (compare_code != UNKNOWN)
20781 /* notl op1 (if needed)
20782 sarl $31, op1
20783 andl (cf-ct), op1
20784 addl ct, op1
20786 For x < 0 (resp. x <= -1) there will be no notl,
20787 so if possible swap the constants to get rid of the
20788 complement.
20789 True/false will be -1/0 while code below (store flag
20790 followed by decrement) is 0/-1, so the constants need
20791 to be exchanged once more. */
20793 if (compare_code == GE || !cf)
20795 code = reverse_condition (code);
20796 compare_code = LT;
20798 else
20800 HOST_WIDE_INT tmp = cf;
20801 cf = ct;
20802 ct = tmp;
20805 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20807 else
20809 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20811 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20812 constm1_rtx,
20813 copy_rtx (out), 1, OPTAB_DIRECT);
20816 out = expand_simple_binop (mode, AND, copy_rtx (out),
20817 gen_int_mode (cf - ct, mode),
20818 copy_rtx (out), 1, OPTAB_DIRECT);
20819 if (ct)
20820 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20821 copy_rtx (out), 1, OPTAB_DIRECT);
20822 if (!rtx_equal_p (out, operands[0]))
20823 emit_move_insn (operands[0], copy_rtx (out));
20825 return true;
20829 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20831 /* Try a few things more with specific constants and a variable. */
20833 optab op;
20834 rtx var, orig_out, out, tmp;
20836 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20837 return false;
20839 /* If one of the two operands is an interesting constant, load a
20840 constant with the above and mask it in with a logical operation. */
20842 if (CONST_INT_P (operands[2]))
20844 var = operands[3];
20845 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20846 operands[3] = constm1_rtx, op = and_optab;
20847 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20848 operands[3] = const0_rtx, op = ior_optab;
20849 else
20850 return false;
20852 else if (CONST_INT_P (operands[3]))
20854 var = operands[2];
20855 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20856 operands[2] = constm1_rtx, op = and_optab;
20857 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20858 operands[2] = const0_rtx, op = ior_optab;
20859 else
20860 return false;
20862 else
20863 return false;
20865 orig_out = operands[0];
20866 tmp = gen_reg_rtx (mode);
20867 operands[0] = tmp;
20869 /* Recurse to get the constant loaded. */
20870 if (ix86_expand_int_movcc (operands) == 0)
20871 return false;
20873 /* Mask in the interesting variable. */
20874 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20875 OPTAB_WIDEN);
20876 if (!rtx_equal_p (out, orig_out))
20877 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20879 return true;
20883 * For comparison with above,
20885 * movl cf,dest
20886 * movl ct,tmp
20887 * cmpl op1,op2
20888 * cmovcc tmp,dest
20890 * Size 15.
20893 if (! nonimmediate_operand (operands[2], mode))
20894 operands[2] = force_reg (mode, operands[2]);
20895 if (! nonimmediate_operand (operands[3], mode))
20896 operands[3] = force_reg (mode, operands[3]);
20898 if (! register_operand (operands[2], VOIDmode)
20899 && (mode == QImode
20900 || ! register_operand (operands[3], VOIDmode)))
20901 operands[2] = force_reg (mode, operands[2]);
20903 if (mode == QImode
20904 && ! register_operand (operands[3], VOIDmode))
20905 operands[3] = force_reg (mode, operands[3]);
20907 emit_insn (compare_seq);
20908 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20909 gen_rtx_IF_THEN_ELSE (mode,
20910 compare_op, operands[2],
20911 operands[3])));
20912 return true;
20915 /* Swap, force into registers, or otherwise massage the two operands
20916 to an sse comparison with a mask result. Thus we differ a bit from
20917 ix86_prepare_fp_compare_args which expects to produce a flags result.
20919 The DEST operand exists to help determine whether to commute commutative
20920 operators. The POP0/POP1 operands are updated in place. The new
20921 comparison code is returned, or UNKNOWN if not implementable. */
20923 static enum rtx_code
20924 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20925 rtx *pop0, rtx *pop1)
20927 rtx tmp;
20929 switch (code)
20931 case LTGT:
20932 case UNEQ:
20933 /* AVX supports all the needed comparisons. */
20934 if (TARGET_AVX)
20935 break;
20936 /* We have no LTGT as an operator. We could implement it with
20937 NE & ORDERED, but this requires an extra temporary. It's
20938 not clear that it's worth it. */
20939 return UNKNOWN;
20941 case LT:
20942 case LE:
20943 case UNGT:
20944 case UNGE:
20945 /* These are supported directly. */
20946 break;
20948 case EQ:
20949 case NE:
20950 case UNORDERED:
20951 case ORDERED:
20952 /* AVX has 3 operand comparisons, no need to swap anything. */
20953 if (TARGET_AVX)
20954 break;
20955 /* For commutative operators, try to canonicalize the destination
20956 operand to be first in the comparison - this helps reload to
20957 avoid extra moves. */
20958 if (!dest || !rtx_equal_p (dest, *pop1))
20959 break;
20960 /* FALLTHRU */
20962 case GE:
20963 case GT:
20964 case UNLE:
20965 case UNLT:
20966 /* These are not supported directly before AVX, and furthermore
20967 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20968 comparison operands to transform into something that is
20969 supported. */
20970 tmp = *pop0;
20971 *pop0 = *pop1;
20972 *pop1 = tmp;
20973 code = swap_condition (code);
20974 break;
20976 default:
20977 gcc_unreachable ();
20980 return code;
20983 /* Detect conditional moves that exactly match min/max operational
20984 semantics. Note that this is IEEE safe, as long as we don't
20985 interchange the operands.
20987 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20988 and TRUE if the operation is successful and instructions are emitted. */
20990 static bool
20991 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20992 rtx cmp_op1, rtx if_true, rtx if_false)
20994 enum machine_mode mode;
20995 bool is_min;
20996 rtx tmp;
20998 if (code == LT)
21000 else if (code == UNGE)
21002 tmp = if_true;
21003 if_true = if_false;
21004 if_false = tmp;
21006 else
21007 return false;
21009 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21010 is_min = true;
21011 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21012 is_min = false;
21013 else
21014 return false;
21016 mode = GET_MODE (dest);
21018 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21019 but MODE may be a vector mode and thus not appropriate. */
21020 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21022 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21023 rtvec v;
21025 if_true = force_reg (mode, if_true);
21026 v = gen_rtvec (2, if_true, if_false);
21027 tmp = gen_rtx_UNSPEC (mode, v, u);
21029 else
21031 code = is_min ? SMIN : SMAX;
21032 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21035 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21036 return true;
21039 /* Expand an sse vector comparison. Return the register with the result. */
21041 static rtx
21042 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21043 rtx op_true, rtx op_false)
21045 enum machine_mode mode = GET_MODE (dest);
21046 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21048 /* In general case result of comparison can differ from operands' type. */
21049 enum machine_mode cmp_mode;
21051 /* In AVX512F the result of comparison is an integer mask. */
21052 bool maskcmp = false;
21053 rtx x;
21055 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21057 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21058 gcc_assert (cmp_mode != BLKmode);
21060 maskcmp = true;
21062 else
21063 cmp_mode = cmp_ops_mode;
21066 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21067 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21068 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21070 if (optimize
21071 || reg_overlap_mentioned_p (dest, op_true)
21072 || reg_overlap_mentioned_p (dest, op_false))
21073 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21075 /* Compare patterns for int modes are unspec in AVX512F only. */
21076 if (maskcmp && (code == GT || code == EQ))
21078 rtx (*gen)(rtx, rtx, rtx);
21080 switch (cmp_ops_mode)
21082 case V16SImode:
21083 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21084 break;
21085 case V8DImode:
21086 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21087 break;
21088 default:
21089 gen = NULL;
21092 if (gen)
21094 emit_insn (gen (dest, cmp_op0, cmp_op1));
21095 return dest;
21098 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21100 if (cmp_mode != mode && !maskcmp)
21102 x = force_reg (cmp_ops_mode, x);
21103 convert_move (dest, x, false);
21105 else
21106 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21108 return dest;
21111 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21112 operations. This is used for both scalar and vector conditional moves. */
21114 static void
21115 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21117 enum machine_mode mode = GET_MODE (dest);
21118 enum machine_mode cmpmode = GET_MODE (cmp);
21120 /* In AVX512F the result of comparison is an integer mask. */
21121 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21123 rtx t2, t3, x;
21125 if (vector_all_ones_operand (op_true, mode)
21126 && rtx_equal_p (op_false, CONST0_RTX (mode))
21127 && !maskcmp)
21129 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21131 else if (op_false == CONST0_RTX (mode)
21132 && !maskcmp)
21134 op_true = force_reg (mode, op_true);
21135 x = gen_rtx_AND (mode, cmp, op_true);
21136 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21138 else if (op_true == CONST0_RTX (mode)
21139 && !maskcmp)
21141 op_false = force_reg (mode, op_false);
21142 x = gen_rtx_NOT (mode, cmp);
21143 x = gen_rtx_AND (mode, x, op_false);
21144 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21146 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21147 && !maskcmp)
21149 op_false = force_reg (mode, op_false);
21150 x = gen_rtx_IOR (mode, cmp, op_false);
21151 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21153 else if (TARGET_XOP
21154 && !maskcmp)
21156 op_true = force_reg (mode, op_true);
21158 if (!nonimmediate_operand (op_false, mode))
21159 op_false = force_reg (mode, op_false);
21161 emit_insn (gen_rtx_SET (mode, dest,
21162 gen_rtx_IF_THEN_ELSE (mode, cmp,
21163 op_true,
21164 op_false)));
21166 else
21168 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21169 rtx d = dest;
21171 if (!nonimmediate_operand (op_true, mode))
21172 op_true = force_reg (mode, op_true);
21174 op_false = force_reg (mode, op_false);
21176 switch (mode)
21178 case V4SFmode:
21179 if (TARGET_SSE4_1)
21180 gen = gen_sse4_1_blendvps;
21181 break;
21182 case V2DFmode:
21183 if (TARGET_SSE4_1)
21184 gen = gen_sse4_1_blendvpd;
21185 break;
21186 case V16QImode:
21187 case V8HImode:
21188 case V4SImode:
21189 case V2DImode:
21190 if (TARGET_SSE4_1)
21192 gen = gen_sse4_1_pblendvb;
21193 if (mode != V16QImode)
21194 d = gen_reg_rtx (V16QImode);
21195 op_false = gen_lowpart (V16QImode, op_false);
21196 op_true = gen_lowpart (V16QImode, op_true);
21197 cmp = gen_lowpart (V16QImode, cmp);
21199 break;
21200 case V8SFmode:
21201 if (TARGET_AVX)
21202 gen = gen_avx_blendvps256;
21203 break;
21204 case V4DFmode:
21205 if (TARGET_AVX)
21206 gen = gen_avx_blendvpd256;
21207 break;
21208 case V32QImode:
21209 case V16HImode:
21210 case V8SImode:
21211 case V4DImode:
21212 if (TARGET_AVX2)
21214 gen = gen_avx2_pblendvb;
21215 if (mode != V32QImode)
21216 d = gen_reg_rtx (V32QImode);
21217 op_false = gen_lowpart (V32QImode, op_false);
21218 op_true = gen_lowpart (V32QImode, op_true);
21219 cmp = gen_lowpart (V32QImode, cmp);
21221 break;
21223 case V16SImode:
21224 gen = gen_avx512f_blendmv16si;
21225 break;
21226 case V8DImode:
21227 gen = gen_avx512f_blendmv8di;
21228 break;
21229 case V8DFmode:
21230 gen = gen_avx512f_blendmv8df;
21231 break;
21232 case V16SFmode:
21233 gen = gen_avx512f_blendmv16sf;
21234 break;
21236 default:
21237 break;
21240 if (gen != NULL)
21242 emit_insn (gen (d, op_false, op_true, cmp));
21243 if (d != dest)
21244 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21246 else
21248 op_true = force_reg (mode, op_true);
21250 t2 = gen_reg_rtx (mode);
21251 if (optimize)
21252 t3 = gen_reg_rtx (mode);
21253 else
21254 t3 = dest;
21256 x = gen_rtx_AND (mode, op_true, cmp);
21257 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21259 x = gen_rtx_NOT (mode, cmp);
21260 x = gen_rtx_AND (mode, x, op_false);
21261 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21263 x = gen_rtx_IOR (mode, t3, t2);
21264 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21269 /* Expand a floating-point conditional move. Return true if successful. */
21271 bool
21272 ix86_expand_fp_movcc (rtx operands[])
21274 enum machine_mode mode = GET_MODE (operands[0]);
21275 enum rtx_code code = GET_CODE (operands[1]);
21276 rtx tmp, compare_op;
21277 rtx op0 = XEXP (operands[1], 0);
21278 rtx op1 = XEXP (operands[1], 1);
21280 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21282 enum machine_mode cmode;
21284 /* Since we've no cmove for sse registers, don't force bad register
21285 allocation just to gain access to it. Deny movcc when the
21286 comparison mode doesn't match the move mode. */
21287 cmode = GET_MODE (op0);
21288 if (cmode == VOIDmode)
21289 cmode = GET_MODE (op1);
21290 if (cmode != mode)
21291 return false;
21293 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21294 if (code == UNKNOWN)
21295 return false;
21297 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21298 operands[2], operands[3]))
21299 return true;
21301 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21302 operands[2], operands[3]);
21303 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21304 return true;
21307 if (GET_MODE (op0) == TImode
21308 || (GET_MODE (op0) == DImode
21309 && !TARGET_64BIT))
21310 return false;
21312 /* The floating point conditional move instructions don't directly
21313 support conditions resulting from a signed integer comparison. */
21315 compare_op = ix86_expand_compare (code, op0, op1);
21316 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21318 tmp = gen_reg_rtx (QImode);
21319 ix86_expand_setcc (tmp, code, op0, op1);
21321 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21324 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21325 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21326 operands[2], operands[3])));
21328 return true;
21331 /* Expand a floating-point vector conditional move; a vcond operation
21332 rather than a movcc operation. */
21334 bool
21335 ix86_expand_fp_vcond (rtx operands[])
21337 enum rtx_code code = GET_CODE (operands[3]);
21338 rtx cmp;
21340 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21341 &operands[4], &operands[5]);
21342 if (code == UNKNOWN)
21344 rtx temp;
21345 switch (GET_CODE (operands[3]))
21347 case LTGT:
21348 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21349 operands[5], operands[0], operands[0]);
21350 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21351 operands[5], operands[1], operands[2]);
21352 code = AND;
21353 break;
21354 case UNEQ:
21355 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21356 operands[5], operands[0], operands[0]);
21357 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21358 operands[5], operands[1], operands[2]);
21359 code = IOR;
21360 break;
21361 default:
21362 gcc_unreachable ();
21364 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21365 OPTAB_DIRECT);
21366 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21367 return true;
21370 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21371 operands[5], operands[1], operands[2]))
21372 return true;
21374 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21375 operands[1], operands[2]);
21376 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21377 return true;
21380 /* Expand a signed/unsigned integral vector conditional move. */
21382 bool
21383 ix86_expand_int_vcond (rtx operands[])
21385 enum machine_mode data_mode = GET_MODE (operands[0]);
21386 enum machine_mode mode = GET_MODE (operands[4]);
21387 enum rtx_code code = GET_CODE (operands[3]);
21388 bool negate = false;
21389 rtx x, cop0, cop1;
21391 cop0 = operands[4];
21392 cop1 = operands[5];
21394 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21395 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21396 if ((code == LT || code == GE)
21397 && data_mode == mode
21398 && cop1 == CONST0_RTX (mode)
21399 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21400 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21401 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21402 && (GET_MODE_SIZE (data_mode) == 16
21403 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21405 rtx negop = operands[2 - (code == LT)];
21406 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21407 if (negop == CONST1_RTX (data_mode))
21409 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21410 operands[0], 1, OPTAB_DIRECT);
21411 if (res != operands[0])
21412 emit_move_insn (operands[0], res);
21413 return true;
21415 else if (GET_MODE_INNER (data_mode) != DImode
21416 && vector_all_ones_operand (negop, data_mode))
21418 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21419 operands[0], 0, OPTAB_DIRECT);
21420 if (res != operands[0])
21421 emit_move_insn (operands[0], res);
21422 return true;
21426 if (!nonimmediate_operand (cop1, mode))
21427 cop1 = force_reg (mode, cop1);
21428 if (!general_operand (operands[1], data_mode))
21429 operands[1] = force_reg (data_mode, operands[1]);
21430 if (!general_operand (operands[2], data_mode))
21431 operands[2] = force_reg (data_mode, operands[2]);
21433 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21434 if (TARGET_XOP
21435 && (mode == V16QImode || mode == V8HImode
21436 || mode == V4SImode || mode == V2DImode))
21438 else
21440 /* Canonicalize the comparison to EQ, GT, GTU. */
21441 switch (code)
21443 case EQ:
21444 case GT:
21445 case GTU:
21446 break;
21448 case NE:
21449 case LE:
21450 case LEU:
21451 code = reverse_condition (code);
21452 negate = true;
21453 break;
21455 case GE:
21456 case GEU:
21457 code = reverse_condition (code);
21458 negate = true;
21459 /* FALLTHRU */
21461 case LT:
21462 case LTU:
21463 code = swap_condition (code);
21464 x = cop0, cop0 = cop1, cop1 = x;
21465 break;
21467 default:
21468 gcc_unreachable ();
21471 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21472 if (mode == V2DImode)
21474 switch (code)
21476 case EQ:
21477 /* SSE4.1 supports EQ. */
21478 if (!TARGET_SSE4_1)
21479 return false;
21480 break;
21482 case GT:
21483 case GTU:
21484 /* SSE4.2 supports GT/GTU. */
21485 if (!TARGET_SSE4_2)
21486 return false;
21487 break;
21489 default:
21490 gcc_unreachable ();
21494 /* Unsigned parallel compare is not supported by the hardware.
21495 Play some tricks to turn this into a signed comparison
21496 against 0. */
21497 if (code == GTU)
21499 cop0 = force_reg (mode, cop0);
21501 switch (mode)
21503 case V16SImode:
21504 case V8DImode:
21505 case V8SImode:
21506 case V4DImode:
21507 case V4SImode:
21508 case V2DImode:
21510 rtx t1, t2, mask;
21511 rtx (*gen_sub3) (rtx, rtx, rtx);
21513 switch (mode)
21515 case V16SImode: gen_sub3 = gen_subv16si3; break;
21516 case V8DImode: gen_sub3 = gen_subv8di3; break;
21517 case V8SImode: gen_sub3 = gen_subv8si3; break;
21518 case V4DImode: gen_sub3 = gen_subv4di3; break;
21519 case V4SImode: gen_sub3 = gen_subv4si3; break;
21520 case V2DImode: gen_sub3 = gen_subv2di3; break;
21521 default:
21522 gcc_unreachable ();
21524 /* Subtract (-(INT MAX) - 1) from both operands to make
21525 them signed. */
21526 mask = ix86_build_signbit_mask (mode, true, false);
21527 t1 = gen_reg_rtx (mode);
21528 emit_insn (gen_sub3 (t1, cop0, mask));
21530 t2 = gen_reg_rtx (mode);
21531 emit_insn (gen_sub3 (t2, cop1, mask));
21533 cop0 = t1;
21534 cop1 = t2;
21535 code = GT;
21537 break;
21539 case V32QImode:
21540 case V16HImode:
21541 case V16QImode:
21542 case V8HImode:
21543 /* Perform a parallel unsigned saturating subtraction. */
21544 x = gen_reg_rtx (mode);
21545 emit_insn (gen_rtx_SET (VOIDmode, x,
21546 gen_rtx_US_MINUS (mode, cop0, cop1)));
21548 cop0 = x;
21549 cop1 = CONST0_RTX (mode);
21550 code = EQ;
21551 negate = !negate;
21552 break;
21554 default:
21555 gcc_unreachable ();
21560 /* Allow the comparison to be done in one mode, but the movcc to
21561 happen in another mode. */
21562 if (data_mode == mode)
21564 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21565 operands[1+negate], operands[2-negate]);
21567 else
21569 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21570 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21571 operands[1+negate], operands[2-negate]);
21572 if (GET_MODE (x) == mode)
21573 x = gen_lowpart (data_mode, x);
21576 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21577 operands[2-negate]);
21578 return true;
21581 static bool
21582 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21584 enum machine_mode mode = GET_MODE (op0);
21585 switch (mode)
21587 case V16SImode:
21588 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21589 force_reg (V16SImode, mask),
21590 op1));
21591 return true;
21592 case V16SFmode:
21593 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21594 force_reg (V16SImode, mask),
21595 op1));
21596 return true;
21597 case V8DImode:
21598 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21599 force_reg (V8DImode, mask), op1));
21600 return true;
21601 case V8DFmode:
21602 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21603 force_reg (V8DImode, mask), op1));
21604 return true;
21605 default:
21606 return false;
21610 /* Expand a variable vector permutation. */
21612 void
21613 ix86_expand_vec_perm (rtx operands[])
21615 rtx target = operands[0];
21616 rtx op0 = operands[1];
21617 rtx op1 = operands[2];
21618 rtx mask = operands[3];
21619 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21620 enum machine_mode mode = GET_MODE (op0);
21621 enum machine_mode maskmode = GET_MODE (mask);
21622 int w, e, i;
21623 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21625 /* Number of elements in the vector. */
21626 w = GET_MODE_NUNITS (mode);
21627 e = GET_MODE_UNIT_SIZE (mode);
21628 gcc_assert (w <= 64);
21630 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21631 return;
21633 if (TARGET_AVX2)
21635 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21637 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21638 an constant shuffle operand. With a tiny bit of effort we can
21639 use VPERMD instead. A re-interpretation stall for V4DFmode is
21640 unfortunate but there's no avoiding it.
21641 Similarly for V16HImode we don't have instructions for variable
21642 shuffling, while for V32QImode we can use after preparing suitable
21643 masks vpshufb; vpshufb; vpermq; vpor. */
21645 if (mode == V16HImode)
21647 maskmode = mode = V32QImode;
21648 w = 32;
21649 e = 1;
21651 else
21653 maskmode = mode = V8SImode;
21654 w = 8;
21655 e = 4;
21657 t1 = gen_reg_rtx (maskmode);
21659 /* Replicate the low bits of the V4DImode mask into V8SImode:
21660 mask = { A B C D }
21661 t1 = { A A B B C C D D }. */
21662 for (i = 0; i < w / 2; ++i)
21663 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21664 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21665 vt = force_reg (maskmode, vt);
21666 mask = gen_lowpart (maskmode, mask);
21667 if (maskmode == V8SImode)
21668 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21669 else
21670 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21672 /* Multiply the shuffle indicies by two. */
21673 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21674 OPTAB_DIRECT);
21676 /* Add one to the odd shuffle indicies:
21677 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21678 for (i = 0; i < w / 2; ++i)
21680 vec[i * 2] = const0_rtx;
21681 vec[i * 2 + 1] = const1_rtx;
21683 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21684 vt = validize_mem (force_const_mem (maskmode, vt));
21685 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21686 OPTAB_DIRECT);
21688 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21689 operands[3] = mask = t1;
21690 target = gen_reg_rtx (mode);
21691 op0 = gen_lowpart (mode, op0);
21692 op1 = gen_lowpart (mode, op1);
21695 switch (mode)
21697 case V8SImode:
21698 /* The VPERMD and VPERMPS instructions already properly ignore
21699 the high bits of the shuffle elements. No need for us to
21700 perform an AND ourselves. */
21701 if (one_operand_shuffle)
21703 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21704 if (target != operands[0])
21705 emit_move_insn (operands[0],
21706 gen_lowpart (GET_MODE (operands[0]), target));
21708 else
21710 t1 = gen_reg_rtx (V8SImode);
21711 t2 = gen_reg_rtx (V8SImode);
21712 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21713 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21714 goto merge_two;
21716 return;
21718 case V8SFmode:
21719 mask = gen_lowpart (V8SImode, mask);
21720 if (one_operand_shuffle)
21721 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21722 else
21724 t1 = gen_reg_rtx (V8SFmode);
21725 t2 = gen_reg_rtx (V8SFmode);
21726 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21727 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21728 goto merge_two;
21730 return;
21732 case V4SImode:
21733 /* By combining the two 128-bit input vectors into one 256-bit
21734 input vector, we can use VPERMD and VPERMPS for the full
21735 two-operand shuffle. */
21736 t1 = gen_reg_rtx (V8SImode);
21737 t2 = gen_reg_rtx (V8SImode);
21738 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21739 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21740 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21741 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21742 return;
21744 case V4SFmode:
21745 t1 = gen_reg_rtx (V8SFmode);
21746 t2 = gen_reg_rtx (V8SImode);
21747 mask = gen_lowpart (V4SImode, mask);
21748 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21749 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21750 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21751 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21752 return;
21754 case V32QImode:
21755 t1 = gen_reg_rtx (V32QImode);
21756 t2 = gen_reg_rtx (V32QImode);
21757 t3 = gen_reg_rtx (V32QImode);
21758 vt2 = GEN_INT (128);
21759 for (i = 0; i < 32; i++)
21760 vec[i] = vt2;
21761 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21762 vt = force_reg (V32QImode, vt);
21763 for (i = 0; i < 32; i++)
21764 vec[i] = i < 16 ? vt2 : const0_rtx;
21765 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21766 vt2 = force_reg (V32QImode, vt2);
21767 /* From mask create two adjusted masks, which contain the same
21768 bits as mask in the low 7 bits of each vector element.
21769 The first mask will have the most significant bit clear
21770 if it requests element from the same 128-bit lane
21771 and MSB set if it requests element from the other 128-bit lane.
21772 The second mask will have the opposite values of the MSB,
21773 and additionally will have its 128-bit lanes swapped.
21774 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21775 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21776 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21777 stands for other 12 bytes. */
21778 /* The bit whether element is from the same lane or the other
21779 lane is bit 4, so shift it up by 3 to the MSB position. */
21780 t5 = gen_reg_rtx (V4DImode);
21781 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21782 GEN_INT (3)));
21783 /* Clear MSB bits from the mask just in case it had them set. */
21784 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21785 /* After this t1 will have MSB set for elements from other lane. */
21786 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21787 /* Clear bits other than MSB. */
21788 emit_insn (gen_andv32qi3 (t1, t1, vt));
21789 /* Or in the lower bits from mask into t3. */
21790 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21791 /* And invert MSB bits in t1, so MSB is set for elements from the same
21792 lane. */
21793 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21794 /* Swap 128-bit lanes in t3. */
21795 t6 = gen_reg_rtx (V4DImode);
21796 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21797 const2_rtx, GEN_INT (3),
21798 const0_rtx, const1_rtx));
21799 /* And or in the lower bits from mask into t1. */
21800 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21801 if (one_operand_shuffle)
21803 /* Each of these shuffles will put 0s in places where
21804 element from the other 128-bit lane is needed, otherwise
21805 will shuffle in the requested value. */
21806 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21807 gen_lowpart (V32QImode, t6)));
21808 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21809 /* For t3 the 128-bit lanes are swapped again. */
21810 t7 = gen_reg_rtx (V4DImode);
21811 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21812 const2_rtx, GEN_INT (3),
21813 const0_rtx, const1_rtx));
21814 /* And oring both together leads to the result. */
21815 emit_insn (gen_iorv32qi3 (target, t1,
21816 gen_lowpart (V32QImode, t7)));
21817 if (target != operands[0])
21818 emit_move_insn (operands[0],
21819 gen_lowpart (GET_MODE (operands[0]), target));
21820 return;
21823 t4 = gen_reg_rtx (V32QImode);
21824 /* Similarly to the above one_operand_shuffle code,
21825 just for repeated twice for each operand. merge_two:
21826 code will merge the two results together. */
21827 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21828 gen_lowpart (V32QImode, t6)));
21829 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21830 gen_lowpart (V32QImode, t6)));
21831 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21832 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21833 t7 = gen_reg_rtx (V4DImode);
21834 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21835 const2_rtx, GEN_INT (3),
21836 const0_rtx, const1_rtx));
21837 t8 = gen_reg_rtx (V4DImode);
21838 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21839 const2_rtx, GEN_INT (3),
21840 const0_rtx, const1_rtx));
21841 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21842 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21843 t1 = t4;
21844 t2 = t3;
21845 goto merge_two;
21847 default:
21848 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21849 break;
21853 if (TARGET_XOP)
21855 /* The XOP VPPERM insn supports three inputs. By ignoring the
21856 one_operand_shuffle special case, we avoid creating another
21857 set of constant vectors in memory. */
21858 one_operand_shuffle = false;
21860 /* mask = mask & {2*w-1, ...} */
21861 vt = GEN_INT (2*w - 1);
21863 else
21865 /* mask = mask & {w-1, ...} */
21866 vt = GEN_INT (w - 1);
21869 for (i = 0; i < w; i++)
21870 vec[i] = vt;
21871 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21872 mask = expand_simple_binop (maskmode, AND, mask, vt,
21873 NULL_RTX, 0, OPTAB_DIRECT);
21875 /* For non-QImode operations, convert the word permutation control
21876 into a byte permutation control. */
21877 if (mode != V16QImode)
21879 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21880 GEN_INT (exact_log2 (e)),
21881 NULL_RTX, 0, OPTAB_DIRECT);
21883 /* Convert mask to vector of chars. */
21884 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21886 /* Replicate each of the input bytes into byte positions:
21887 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21888 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21889 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21890 for (i = 0; i < 16; ++i)
21891 vec[i] = GEN_INT (i/e * e);
21892 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21893 vt = validize_mem (force_const_mem (V16QImode, vt));
21894 if (TARGET_XOP)
21895 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21896 else
21897 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21899 /* Convert it into the byte positions by doing
21900 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21901 for (i = 0; i < 16; ++i)
21902 vec[i] = GEN_INT (i % e);
21903 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21904 vt = validize_mem (force_const_mem (V16QImode, vt));
21905 emit_insn (gen_addv16qi3 (mask, mask, vt));
21908 /* The actual shuffle operations all operate on V16QImode. */
21909 op0 = gen_lowpart (V16QImode, op0);
21910 op1 = gen_lowpart (V16QImode, op1);
21912 if (TARGET_XOP)
21914 if (GET_MODE (target) != V16QImode)
21915 target = gen_reg_rtx (V16QImode);
21916 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21917 if (target != operands[0])
21918 emit_move_insn (operands[0],
21919 gen_lowpart (GET_MODE (operands[0]), target));
21921 else if (one_operand_shuffle)
21923 if (GET_MODE (target) != V16QImode)
21924 target = gen_reg_rtx (V16QImode);
21925 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21926 if (target != operands[0])
21927 emit_move_insn (operands[0],
21928 gen_lowpart (GET_MODE (operands[0]), target));
21930 else
21932 rtx xops[6];
21933 bool ok;
21935 /* Shuffle the two input vectors independently. */
21936 t1 = gen_reg_rtx (V16QImode);
21937 t2 = gen_reg_rtx (V16QImode);
21938 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21939 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21941 merge_two:
21942 /* Then merge them together. The key is whether any given control
21943 element contained a bit set that indicates the second word. */
21944 mask = operands[3];
21945 vt = GEN_INT (w);
21946 if (maskmode == V2DImode && !TARGET_SSE4_1)
21948 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21949 more shuffle to convert the V2DI input mask into a V4SI
21950 input mask. At which point the masking that expand_int_vcond
21951 will work as desired. */
21952 rtx t3 = gen_reg_rtx (V4SImode);
21953 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21954 const0_rtx, const0_rtx,
21955 const2_rtx, const2_rtx));
21956 mask = t3;
21957 maskmode = V4SImode;
21958 e = w = 4;
21961 for (i = 0; i < w; i++)
21962 vec[i] = vt;
21963 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21964 vt = force_reg (maskmode, vt);
21965 mask = expand_simple_binop (maskmode, AND, mask, vt,
21966 NULL_RTX, 0, OPTAB_DIRECT);
21968 if (GET_MODE (target) != mode)
21969 target = gen_reg_rtx (mode);
21970 xops[0] = target;
21971 xops[1] = gen_lowpart (mode, t2);
21972 xops[2] = gen_lowpart (mode, t1);
21973 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21974 xops[4] = mask;
21975 xops[5] = vt;
21976 ok = ix86_expand_int_vcond (xops);
21977 gcc_assert (ok);
21978 if (target != operands[0])
21979 emit_move_insn (operands[0],
21980 gen_lowpart (GET_MODE (operands[0]), target));
21984 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21985 true if we should do zero extension, else sign extension. HIGH_P is
21986 true if we want the N/2 high elements, else the low elements. */
21988 void
21989 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21991 enum machine_mode imode = GET_MODE (src);
21992 rtx tmp;
21994 if (TARGET_SSE4_1)
21996 rtx (*unpack)(rtx, rtx);
21997 rtx (*extract)(rtx, rtx) = NULL;
21998 enum machine_mode halfmode = BLKmode;
22000 switch (imode)
22002 case V32QImode:
22003 if (unsigned_p)
22004 unpack = gen_avx2_zero_extendv16qiv16hi2;
22005 else
22006 unpack = gen_avx2_sign_extendv16qiv16hi2;
22007 halfmode = V16QImode;
22008 extract
22009 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22010 break;
22011 case V32HImode:
22012 if (unsigned_p)
22013 unpack = gen_avx512f_zero_extendv16hiv16si2;
22014 else
22015 unpack = gen_avx512f_sign_extendv16hiv16si2;
22016 halfmode = V16HImode;
22017 extract
22018 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22019 break;
22020 case V16HImode:
22021 if (unsigned_p)
22022 unpack = gen_avx2_zero_extendv8hiv8si2;
22023 else
22024 unpack = gen_avx2_sign_extendv8hiv8si2;
22025 halfmode = V8HImode;
22026 extract
22027 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22028 break;
22029 case V16SImode:
22030 if (unsigned_p)
22031 unpack = gen_avx512f_zero_extendv8siv8di2;
22032 else
22033 unpack = gen_avx512f_sign_extendv8siv8di2;
22034 halfmode = V8SImode;
22035 extract
22036 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22037 break;
22038 case V8SImode:
22039 if (unsigned_p)
22040 unpack = gen_avx2_zero_extendv4siv4di2;
22041 else
22042 unpack = gen_avx2_sign_extendv4siv4di2;
22043 halfmode = V4SImode;
22044 extract
22045 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22046 break;
22047 case V16QImode:
22048 if (unsigned_p)
22049 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22050 else
22051 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22052 break;
22053 case V8HImode:
22054 if (unsigned_p)
22055 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22056 else
22057 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22058 break;
22059 case V4SImode:
22060 if (unsigned_p)
22061 unpack = gen_sse4_1_zero_extendv2siv2di2;
22062 else
22063 unpack = gen_sse4_1_sign_extendv2siv2di2;
22064 break;
22065 default:
22066 gcc_unreachable ();
22069 if (GET_MODE_SIZE (imode) >= 32)
22071 tmp = gen_reg_rtx (halfmode);
22072 emit_insn (extract (tmp, src));
22074 else if (high_p)
22076 /* Shift higher 8 bytes to lower 8 bytes. */
22077 tmp = gen_reg_rtx (V1TImode);
22078 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22079 GEN_INT (64)));
22080 tmp = gen_lowpart (imode, tmp);
22082 else
22083 tmp = src;
22085 emit_insn (unpack (dest, tmp));
22087 else
22089 rtx (*unpack)(rtx, rtx, rtx);
22091 switch (imode)
22093 case V16QImode:
22094 if (high_p)
22095 unpack = gen_vec_interleave_highv16qi;
22096 else
22097 unpack = gen_vec_interleave_lowv16qi;
22098 break;
22099 case V8HImode:
22100 if (high_p)
22101 unpack = gen_vec_interleave_highv8hi;
22102 else
22103 unpack = gen_vec_interleave_lowv8hi;
22104 break;
22105 case V4SImode:
22106 if (high_p)
22107 unpack = gen_vec_interleave_highv4si;
22108 else
22109 unpack = gen_vec_interleave_lowv4si;
22110 break;
22111 default:
22112 gcc_unreachable ();
22115 if (unsigned_p)
22116 tmp = force_reg (imode, CONST0_RTX (imode));
22117 else
22118 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22119 src, pc_rtx, pc_rtx);
22121 rtx tmp2 = gen_reg_rtx (imode);
22122 emit_insn (unpack (tmp2, src, tmp));
22123 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22127 /* Expand conditional increment or decrement using adb/sbb instructions.
22128 The default case using setcc followed by the conditional move can be
22129 done by generic code. */
22130 bool
22131 ix86_expand_int_addcc (rtx operands[])
22133 enum rtx_code code = GET_CODE (operands[1]);
22134 rtx flags;
22135 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22136 rtx compare_op;
22137 rtx val = const0_rtx;
22138 bool fpcmp = false;
22139 enum machine_mode mode;
22140 rtx op0 = XEXP (operands[1], 0);
22141 rtx op1 = XEXP (operands[1], 1);
22143 if (operands[3] != const1_rtx
22144 && operands[3] != constm1_rtx)
22145 return false;
22146 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22147 return false;
22148 code = GET_CODE (compare_op);
22150 flags = XEXP (compare_op, 0);
22152 if (GET_MODE (flags) == CCFPmode
22153 || GET_MODE (flags) == CCFPUmode)
22155 fpcmp = true;
22156 code = ix86_fp_compare_code_to_integer (code);
22159 if (code != LTU)
22161 val = constm1_rtx;
22162 if (fpcmp)
22163 PUT_CODE (compare_op,
22164 reverse_condition_maybe_unordered
22165 (GET_CODE (compare_op)));
22166 else
22167 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22170 mode = GET_MODE (operands[0]);
22172 /* Construct either adc or sbb insn. */
22173 if ((code == LTU) == (operands[3] == constm1_rtx))
22175 switch (mode)
22177 case QImode:
22178 insn = gen_subqi3_carry;
22179 break;
22180 case HImode:
22181 insn = gen_subhi3_carry;
22182 break;
22183 case SImode:
22184 insn = gen_subsi3_carry;
22185 break;
22186 case DImode:
22187 insn = gen_subdi3_carry;
22188 break;
22189 default:
22190 gcc_unreachable ();
22193 else
22195 switch (mode)
22197 case QImode:
22198 insn = gen_addqi3_carry;
22199 break;
22200 case HImode:
22201 insn = gen_addhi3_carry;
22202 break;
22203 case SImode:
22204 insn = gen_addsi3_carry;
22205 break;
22206 case DImode:
22207 insn = gen_adddi3_carry;
22208 break;
22209 default:
22210 gcc_unreachable ();
22213 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22215 return true;
22219 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22220 but works for floating pointer parameters and nonoffsetable memories.
22221 For pushes, it returns just stack offsets; the values will be saved
22222 in the right order. Maximally three parts are generated. */
22224 static int
22225 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22227 int size;
22229 if (!TARGET_64BIT)
22230 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22231 else
22232 size = (GET_MODE_SIZE (mode) + 4) / 8;
22234 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22235 gcc_assert (size >= 2 && size <= 4);
22237 /* Optimize constant pool reference to immediates. This is used by fp
22238 moves, that force all constants to memory to allow combining. */
22239 if (MEM_P (operand) && MEM_READONLY_P (operand))
22241 rtx tmp = maybe_get_pool_constant (operand);
22242 if (tmp)
22243 operand = tmp;
22246 if (MEM_P (operand) && !offsettable_memref_p (operand))
22248 /* The only non-offsetable memories we handle are pushes. */
22249 int ok = push_operand (operand, VOIDmode);
22251 gcc_assert (ok);
22253 operand = copy_rtx (operand);
22254 PUT_MODE (operand, word_mode);
22255 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22256 return size;
22259 if (GET_CODE (operand) == CONST_VECTOR)
22261 enum machine_mode imode = int_mode_for_mode (mode);
22262 /* Caution: if we looked through a constant pool memory above,
22263 the operand may actually have a different mode now. That's
22264 ok, since we want to pun this all the way back to an integer. */
22265 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22266 gcc_assert (operand != NULL);
22267 mode = imode;
22270 if (!TARGET_64BIT)
22272 if (mode == DImode)
22273 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22274 else
22276 int i;
22278 if (REG_P (operand))
22280 gcc_assert (reload_completed);
22281 for (i = 0; i < size; i++)
22282 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22284 else if (offsettable_memref_p (operand))
22286 operand = adjust_address (operand, SImode, 0);
22287 parts[0] = operand;
22288 for (i = 1; i < size; i++)
22289 parts[i] = adjust_address (operand, SImode, 4 * i);
22291 else if (GET_CODE (operand) == CONST_DOUBLE)
22293 REAL_VALUE_TYPE r;
22294 long l[4];
22296 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22297 switch (mode)
22299 case TFmode:
22300 real_to_target (l, &r, mode);
22301 parts[3] = gen_int_mode (l[3], SImode);
22302 parts[2] = gen_int_mode (l[2], SImode);
22303 break;
22304 case XFmode:
22305 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22306 long double may not be 80-bit. */
22307 real_to_target (l, &r, mode);
22308 parts[2] = gen_int_mode (l[2], SImode);
22309 break;
22310 case DFmode:
22311 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22312 break;
22313 default:
22314 gcc_unreachable ();
22316 parts[1] = gen_int_mode (l[1], SImode);
22317 parts[0] = gen_int_mode (l[0], SImode);
22319 else
22320 gcc_unreachable ();
22323 else
22325 if (mode == TImode)
22326 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22327 if (mode == XFmode || mode == TFmode)
22329 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22330 if (REG_P (operand))
22332 gcc_assert (reload_completed);
22333 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22334 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22336 else if (offsettable_memref_p (operand))
22338 operand = adjust_address (operand, DImode, 0);
22339 parts[0] = operand;
22340 parts[1] = adjust_address (operand, upper_mode, 8);
22342 else if (GET_CODE (operand) == CONST_DOUBLE)
22344 REAL_VALUE_TYPE r;
22345 long l[4];
22347 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22348 real_to_target (l, &r, mode);
22350 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22351 if (HOST_BITS_PER_WIDE_INT >= 64)
22352 parts[0]
22353 = gen_int_mode
22354 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22355 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22356 DImode);
22357 else
22358 parts[0] = immed_double_const (l[0], l[1], DImode);
22360 if (upper_mode == SImode)
22361 parts[1] = gen_int_mode (l[2], SImode);
22362 else if (HOST_BITS_PER_WIDE_INT >= 64)
22363 parts[1]
22364 = gen_int_mode
22365 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22366 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22367 DImode);
22368 else
22369 parts[1] = immed_double_const (l[2], l[3], DImode);
22371 else
22372 gcc_unreachable ();
22376 return size;
22379 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22380 Return false when normal moves are needed; true when all required
22381 insns have been emitted. Operands 2-4 contain the input values
22382 int the correct order; operands 5-7 contain the output values. */
22384 void
22385 ix86_split_long_move (rtx operands[])
22387 rtx part[2][4];
22388 int nparts, i, j;
22389 int push = 0;
22390 int collisions = 0;
22391 enum machine_mode mode = GET_MODE (operands[0]);
22392 bool collisionparts[4];
22394 /* The DFmode expanders may ask us to move double.
22395 For 64bit target this is single move. By hiding the fact
22396 here we simplify i386.md splitters. */
22397 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22399 /* Optimize constant pool reference to immediates. This is used by
22400 fp moves, that force all constants to memory to allow combining. */
22402 if (MEM_P (operands[1])
22403 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22404 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22405 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22406 if (push_operand (operands[0], VOIDmode))
22408 operands[0] = copy_rtx (operands[0]);
22409 PUT_MODE (operands[0], word_mode);
22411 else
22412 operands[0] = gen_lowpart (DImode, operands[0]);
22413 operands[1] = gen_lowpart (DImode, operands[1]);
22414 emit_move_insn (operands[0], operands[1]);
22415 return;
22418 /* The only non-offsettable memory we handle is push. */
22419 if (push_operand (operands[0], VOIDmode))
22420 push = 1;
22421 else
22422 gcc_assert (!MEM_P (operands[0])
22423 || offsettable_memref_p (operands[0]));
22425 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22426 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22428 /* When emitting push, take care for source operands on the stack. */
22429 if (push && MEM_P (operands[1])
22430 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22432 rtx src_base = XEXP (part[1][nparts - 1], 0);
22434 /* Compensate for the stack decrement by 4. */
22435 if (!TARGET_64BIT && nparts == 3
22436 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22437 src_base = plus_constant (Pmode, src_base, 4);
22439 /* src_base refers to the stack pointer and is
22440 automatically decreased by emitted push. */
22441 for (i = 0; i < nparts; i++)
22442 part[1][i] = change_address (part[1][i],
22443 GET_MODE (part[1][i]), src_base);
22446 /* We need to do copy in the right order in case an address register
22447 of the source overlaps the destination. */
22448 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22450 rtx tmp;
22452 for (i = 0; i < nparts; i++)
22454 collisionparts[i]
22455 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22456 if (collisionparts[i])
22457 collisions++;
22460 /* Collision in the middle part can be handled by reordering. */
22461 if (collisions == 1 && nparts == 3 && collisionparts [1])
22463 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22464 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22466 else if (collisions == 1
22467 && nparts == 4
22468 && (collisionparts [1] || collisionparts [2]))
22470 if (collisionparts [1])
22472 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22473 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22475 else
22477 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22478 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22482 /* If there are more collisions, we can't handle it by reordering.
22483 Do an lea to the last part and use only one colliding move. */
22484 else if (collisions > 1)
22486 rtx base;
22488 collisions = 1;
22490 base = part[0][nparts - 1];
22492 /* Handle the case when the last part isn't valid for lea.
22493 Happens in 64-bit mode storing the 12-byte XFmode. */
22494 if (GET_MODE (base) != Pmode)
22495 base = gen_rtx_REG (Pmode, REGNO (base));
22497 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22498 part[1][0] = replace_equiv_address (part[1][0], base);
22499 for (i = 1; i < nparts; i++)
22501 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22502 part[1][i] = replace_equiv_address (part[1][i], tmp);
22507 if (push)
22509 if (!TARGET_64BIT)
22511 if (nparts == 3)
22513 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22514 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22515 stack_pointer_rtx, GEN_INT (-4)));
22516 emit_move_insn (part[0][2], part[1][2]);
22518 else if (nparts == 4)
22520 emit_move_insn (part[0][3], part[1][3]);
22521 emit_move_insn (part[0][2], part[1][2]);
22524 else
22526 /* In 64bit mode we don't have 32bit push available. In case this is
22527 register, it is OK - we will just use larger counterpart. We also
22528 retype memory - these comes from attempt to avoid REX prefix on
22529 moving of second half of TFmode value. */
22530 if (GET_MODE (part[1][1]) == SImode)
22532 switch (GET_CODE (part[1][1]))
22534 case MEM:
22535 part[1][1] = adjust_address (part[1][1], DImode, 0);
22536 break;
22538 case REG:
22539 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22540 break;
22542 default:
22543 gcc_unreachable ();
22546 if (GET_MODE (part[1][0]) == SImode)
22547 part[1][0] = part[1][1];
22550 emit_move_insn (part[0][1], part[1][1]);
22551 emit_move_insn (part[0][0], part[1][0]);
22552 return;
22555 /* Choose correct order to not overwrite the source before it is copied. */
22556 if ((REG_P (part[0][0])
22557 && REG_P (part[1][1])
22558 && (REGNO (part[0][0]) == REGNO (part[1][1])
22559 || (nparts == 3
22560 && REGNO (part[0][0]) == REGNO (part[1][2]))
22561 || (nparts == 4
22562 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22563 || (collisions > 0
22564 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22566 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22568 operands[2 + i] = part[0][j];
22569 operands[6 + i] = part[1][j];
22572 else
22574 for (i = 0; i < nparts; i++)
22576 operands[2 + i] = part[0][i];
22577 operands[6 + i] = part[1][i];
22581 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22582 if (optimize_insn_for_size_p ())
22584 for (j = 0; j < nparts - 1; j++)
22585 if (CONST_INT_P (operands[6 + j])
22586 && operands[6 + j] != const0_rtx
22587 && REG_P (operands[2 + j]))
22588 for (i = j; i < nparts - 1; i++)
22589 if (CONST_INT_P (operands[7 + i])
22590 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22591 operands[7 + i] = operands[2 + j];
22594 for (i = 0; i < nparts; i++)
22595 emit_move_insn (operands[2 + i], operands[6 + i]);
22597 return;
22600 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22601 left shift by a constant, either using a single shift or
22602 a sequence of add instructions. */
22604 static void
22605 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22607 rtx (*insn)(rtx, rtx, rtx);
22609 if (count == 1
22610 || (count * ix86_cost->add <= ix86_cost->shift_const
22611 && !optimize_insn_for_size_p ()))
22613 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22614 while (count-- > 0)
22615 emit_insn (insn (operand, operand, operand));
22617 else
22619 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22620 emit_insn (insn (operand, operand, GEN_INT (count)));
22624 void
22625 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22627 rtx (*gen_ashl3)(rtx, rtx, rtx);
22628 rtx (*gen_shld)(rtx, rtx, rtx);
22629 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22631 rtx low[2], high[2];
22632 int count;
22634 if (CONST_INT_P (operands[2]))
22636 split_double_mode (mode, operands, 2, low, high);
22637 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22639 if (count >= half_width)
22641 emit_move_insn (high[0], low[1]);
22642 emit_move_insn (low[0], const0_rtx);
22644 if (count > half_width)
22645 ix86_expand_ashl_const (high[0], count - half_width, mode);
22647 else
22649 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22651 if (!rtx_equal_p (operands[0], operands[1]))
22652 emit_move_insn (operands[0], operands[1]);
22654 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22655 ix86_expand_ashl_const (low[0], count, mode);
22657 return;
22660 split_double_mode (mode, operands, 1, low, high);
22662 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22664 if (operands[1] == const1_rtx)
22666 /* Assuming we've chosen a QImode capable registers, then 1 << N
22667 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22668 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22670 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22672 ix86_expand_clear (low[0]);
22673 ix86_expand_clear (high[0]);
22674 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22676 d = gen_lowpart (QImode, low[0]);
22677 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22678 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22679 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22681 d = gen_lowpart (QImode, high[0]);
22682 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22683 s = gen_rtx_NE (QImode, flags, const0_rtx);
22684 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22687 /* Otherwise, we can get the same results by manually performing
22688 a bit extract operation on bit 5/6, and then performing the two
22689 shifts. The two methods of getting 0/1 into low/high are exactly
22690 the same size. Avoiding the shift in the bit extract case helps
22691 pentium4 a bit; no one else seems to care much either way. */
22692 else
22694 enum machine_mode half_mode;
22695 rtx (*gen_lshr3)(rtx, rtx, rtx);
22696 rtx (*gen_and3)(rtx, rtx, rtx);
22697 rtx (*gen_xor3)(rtx, rtx, rtx);
22698 HOST_WIDE_INT bits;
22699 rtx x;
22701 if (mode == DImode)
22703 half_mode = SImode;
22704 gen_lshr3 = gen_lshrsi3;
22705 gen_and3 = gen_andsi3;
22706 gen_xor3 = gen_xorsi3;
22707 bits = 5;
22709 else
22711 half_mode = DImode;
22712 gen_lshr3 = gen_lshrdi3;
22713 gen_and3 = gen_anddi3;
22714 gen_xor3 = gen_xordi3;
22715 bits = 6;
22718 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22719 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22720 else
22721 x = gen_lowpart (half_mode, operands[2]);
22722 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22724 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22725 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22726 emit_move_insn (low[0], high[0]);
22727 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22730 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22731 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22732 return;
22735 if (operands[1] == constm1_rtx)
22737 /* For -1 << N, we can avoid the shld instruction, because we
22738 know that we're shifting 0...31/63 ones into a -1. */
22739 emit_move_insn (low[0], constm1_rtx);
22740 if (optimize_insn_for_size_p ())
22741 emit_move_insn (high[0], low[0]);
22742 else
22743 emit_move_insn (high[0], constm1_rtx);
22745 else
22747 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22749 if (!rtx_equal_p (operands[0], operands[1]))
22750 emit_move_insn (operands[0], operands[1]);
22752 split_double_mode (mode, operands, 1, low, high);
22753 emit_insn (gen_shld (high[0], low[0], operands[2]));
22756 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22758 if (TARGET_CMOVE && scratch)
22760 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22761 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22763 ix86_expand_clear (scratch);
22764 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22766 else
22768 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22769 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22771 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22775 void
22776 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22778 rtx (*gen_ashr3)(rtx, rtx, rtx)
22779 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22780 rtx (*gen_shrd)(rtx, rtx, rtx);
22781 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22783 rtx low[2], high[2];
22784 int count;
22786 if (CONST_INT_P (operands[2]))
22788 split_double_mode (mode, operands, 2, low, high);
22789 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22791 if (count == GET_MODE_BITSIZE (mode) - 1)
22793 emit_move_insn (high[0], high[1]);
22794 emit_insn (gen_ashr3 (high[0], high[0],
22795 GEN_INT (half_width - 1)));
22796 emit_move_insn (low[0], high[0]);
22799 else if (count >= half_width)
22801 emit_move_insn (low[0], high[1]);
22802 emit_move_insn (high[0], low[0]);
22803 emit_insn (gen_ashr3 (high[0], high[0],
22804 GEN_INT (half_width - 1)));
22806 if (count > half_width)
22807 emit_insn (gen_ashr3 (low[0], low[0],
22808 GEN_INT (count - half_width)));
22810 else
22812 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22814 if (!rtx_equal_p (operands[0], operands[1]))
22815 emit_move_insn (operands[0], operands[1]);
22817 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22818 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22821 else
22823 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22825 if (!rtx_equal_p (operands[0], operands[1]))
22826 emit_move_insn (operands[0], operands[1]);
22828 split_double_mode (mode, operands, 1, low, high);
22830 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22831 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22833 if (TARGET_CMOVE && scratch)
22835 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22836 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22838 emit_move_insn (scratch, high[0]);
22839 emit_insn (gen_ashr3 (scratch, scratch,
22840 GEN_INT (half_width - 1)));
22841 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22842 scratch));
22844 else
22846 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22847 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22849 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22854 void
22855 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22857 rtx (*gen_lshr3)(rtx, rtx, rtx)
22858 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22859 rtx (*gen_shrd)(rtx, rtx, rtx);
22860 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22862 rtx low[2], high[2];
22863 int count;
22865 if (CONST_INT_P (operands[2]))
22867 split_double_mode (mode, operands, 2, low, high);
22868 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22870 if (count >= half_width)
22872 emit_move_insn (low[0], high[1]);
22873 ix86_expand_clear (high[0]);
22875 if (count > half_width)
22876 emit_insn (gen_lshr3 (low[0], low[0],
22877 GEN_INT (count - half_width)));
22879 else
22881 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22883 if (!rtx_equal_p (operands[0], operands[1]))
22884 emit_move_insn (operands[0], operands[1]);
22886 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22887 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22890 else
22892 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22894 if (!rtx_equal_p (operands[0], operands[1]))
22895 emit_move_insn (operands[0], operands[1]);
22897 split_double_mode (mode, operands, 1, low, high);
22899 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22900 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22902 if (TARGET_CMOVE && scratch)
22904 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22905 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22907 ix86_expand_clear (scratch);
22908 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22909 scratch));
22911 else
22913 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22914 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22916 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22921 /* Predict just emitted jump instruction to be taken with probability PROB. */
22922 static void
22923 predict_jump (int prob)
22925 rtx insn = get_last_insn ();
22926 gcc_assert (JUMP_P (insn));
22927 add_int_reg_note (insn, REG_BR_PROB, prob);
22930 /* Helper function for the string operations below. Dest VARIABLE whether
22931 it is aligned to VALUE bytes. If true, jump to the label. */
22932 static rtx
22933 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22935 rtx label = gen_label_rtx ();
22936 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22937 if (GET_MODE (variable) == DImode)
22938 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22939 else
22940 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22941 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22942 1, label);
22943 if (epilogue)
22944 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22945 else
22946 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22947 return label;
22950 /* Adjust COUNTER by the VALUE. */
22951 static void
22952 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22954 rtx (*gen_add)(rtx, rtx, rtx)
22955 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22957 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22960 /* Zero extend possibly SImode EXP to Pmode register. */
22962 ix86_zero_extend_to_Pmode (rtx exp)
22964 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22967 /* Divide COUNTREG by SCALE. */
22968 static rtx
22969 scale_counter (rtx countreg, int scale)
22971 rtx sc;
22973 if (scale == 1)
22974 return countreg;
22975 if (CONST_INT_P (countreg))
22976 return GEN_INT (INTVAL (countreg) / scale);
22977 gcc_assert (REG_P (countreg));
22979 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22980 GEN_INT (exact_log2 (scale)),
22981 NULL, 1, OPTAB_DIRECT);
22982 return sc;
22985 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22986 DImode for constant loop counts. */
22988 static enum machine_mode
22989 counter_mode (rtx count_exp)
22991 if (GET_MODE (count_exp) != VOIDmode)
22992 return GET_MODE (count_exp);
22993 if (!CONST_INT_P (count_exp))
22994 return Pmode;
22995 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22996 return DImode;
22997 return SImode;
23000 /* Copy the address to a Pmode register. This is used for x32 to
23001 truncate DImode TLS address to a SImode register. */
23003 static rtx
23004 ix86_copy_addr_to_reg (rtx addr)
23006 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23007 return copy_addr_to_reg (addr);
23008 else
23010 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23011 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23015 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23016 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23017 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23018 memory by VALUE (supposed to be in MODE).
23020 The size is rounded down to whole number of chunk size moved at once.
23021 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23024 static void
23025 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23026 rtx destptr, rtx srcptr, rtx value,
23027 rtx count, enum machine_mode mode, int unroll,
23028 int expected_size, bool issetmem)
23030 rtx out_label, top_label, iter, tmp;
23031 enum machine_mode iter_mode = counter_mode (count);
23032 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23033 rtx piece_size = GEN_INT (piece_size_n);
23034 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23035 rtx size;
23036 int i;
23038 top_label = gen_label_rtx ();
23039 out_label = gen_label_rtx ();
23040 iter = gen_reg_rtx (iter_mode);
23042 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23043 NULL, 1, OPTAB_DIRECT);
23044 /* Those two should combine. */
23045 if (piece_size == const1_rtx)
23047 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23048 true, out_label);
23049 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23051 emit_move_insn (iter, const0_rtx);
23053 emit_label (top_label);
23055 tmp = convert_modes (Pmode, iter_mode, iter, true);
23057 /* This assert could be relaxed - in this case we'll need to compute
23058 smallest power of two, containing in PIECE_SIZE_N and pass it to
23059 offset_address. */
23060 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23061 destmem = offset_address (destmem, tmp, piece_size_n);
23062 destmem = adjust_address (destmem, mode, 0);
23064 if (!issetmem)
23066 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23067 srcmem = adjust_address (srcmem, mode, 0);
23069 /* When unrolling for chips that reorder memory reads and writes,
23070 we can save registers by using single temporary.
23071 Also using 4 temporaries is overkill in 32bit mode. */
23072 if (!TARGET_64BIT && 0)
23074 for (i = 0; i < unroll; i++)
23076 if (i)
23078 destmem =
23079 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23080 srcmem =
23081 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23083 emit_move_insn (destmem, srcmem);
23086 else
23088 rtx tmpreg[4];
23089 gcc_assert (unroll <= 4);
23090 for (i = 0; i < unroll; i++)
23092 tmpreg[i] = gen_reg_rtx (mode);
23093 if (i)
23095 srcmem =
23096 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23098 emit_move_insn (tmpreg[i], srcmem);
23100 for (i = 0; i < unroll; i++)
23102 if (i)
23104 destmem =
23105 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23107 emit_move_insn (destmem, tmpreg[i]);
23111 else
23112 for (i = 0; i < unroll; i++)
23114 if (i)
23115 destmem =
23116 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23117 emit_move_insn (destmem, value);
23120 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23121 true, OPTAB_LIB_WIDEN);
23122 if (tmp != iter)
23123 emit_move_insn (iter, tmp);
23125 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23126 true, top_label);
23127 if (expected_size != -1)
23129 expected_size /= GET_MODE_SIZE (mode) * unroll;
23130 if (expected_size == 0)
23131 predict_jump (0);
23132 else if (expected_size > REG_BR_PROB_BASE)
23133 predict_jump (REG_BR_PROB_BASE - 1);
23134 else
23135 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23137 else
23138 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23139 iter = ix86_zero_extend_to_Pmode (iter);
23140 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23141 true, OPTAB_LIB_WIDEN);
23142 if (tmp != destptr)
23143 emit_move_insn (destptr, tmp);
23144 if (!issetmem)
23146 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23147 true, OPTAB_LIB_WIDEN);
23148 if (tmp != srcptr)
23149 emit_move_insn (srcptr, tmp);
23151 emit_label (out_label);
23154 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23155 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23156 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23157 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23158 ORIG_VALUE is the original value passed to memset to fill the memory with.
23159 Other arguments have same meaning as for previous function. */
23161 static void
23162 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23163 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23164 rtx count,
23165 enum machine_mode mode, bool issetmem)
23167 rtx destexp;
23168 rtx srcexp;
23169 rtx countreg;
23170 HOST_WIDE_INT rounded_count;
23172 /* If possible, it is shorter to use rep movs.
23173 TODO: Maybe it is better to move this logic to decide_alg. */
23174 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23175 && (!issetmem || orig_value == const0_rtx))
23176 mode = SImode;
23178 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23179 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23181 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23182 GET_MODE_SIZE (mode)));
23183 if (mode != QImode)
23185 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23186 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23187 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23189 else
23190 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23191 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23193 rounded_count = (INTVAL (count)
23194 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23195 destmem = shallow_copy_rtx (destmem);
23196 set_mem_size (destmem, rounded_count);
23198 else if (MEM_SIZE_KNOWN_P (destmem))
23199 clear_mem_size (destmem);
23201 if (issetmem)
23203 value = force_reg (mode, gen_lowpart (mode, value));
23204 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23206 else
23208 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23209 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23210 if (mode != QImode)
23212 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23213 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23214 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23216 else
23217 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23218 if (CONST_INT_P (count))
23220 rounded_count = (INTVAL (count)
23221 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23222 srcmem = shallow_copy_rtx (srcmem);
23223 set_mem_size (srcmem, rounded_count);
23225 else
23227 if (MEM_SIZE_KNOWN_P (srcmem))
23228 clear_mem_size (srcmem);
23230 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23231 destexp, srcexp));
23235 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23236 DESTMEM.
23237 SRC is passed by pointer to be updated on return.
23238 Return value is updated DST. */
23239 static rtx
23240 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23241 HOST_WIDE_INT size_to_move)
23243 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23244 enum insn_code code;
23245 enum machine_mode move_mode;
23246 int piece_size, i;
23248 /* Find the widest mode in which we could perform moves.
23249 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23250 it until move of such size is supported. */
23251 piece_size = 1 << floor_log2 (size_to_move);
23252 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23253 code = optab_handler (mov_optab, move_mode);
23254 while (code == CODE_FOR_nothing && piece_size > 1)
23256 piece_size >>= 1;
23257 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23258 code = optab_handler (mov_optab, move_mode);
23261 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23262 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23263 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23265 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23266 move_mode = mode_for_vector (word_mode, nunits);
23267 code = optab_handler (mov_optab, move_mode);
23268 if (code == CODE_FOR_nothing)
23270 move_mode = word_mode;
23271 piece_size = GET_MODE_SIZE (move_mode);
23272 code = optab_handler (mov_optab, move_mode);
23275 gcc_assert (code != CODE_FOR_nothing);
23277 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23278 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23280 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23281 gcc_assert (size_to_move % piece_size == 0);
23282 adjust = GEN_INT (piece_size);
23283 for (i = 0; i < size_to_move; i += piece_size)
23285 /* We move from memory to memory, so we'll need to do it via
23286 a temporary register. */
23287 tempreg = gen_reg_rtx (move_mode);
23288 emit_insn (GEN_FCN (code) (tempreg, src));
23289 emit_insn (GEN_FCN (code) (dst, tempreg));
23291 emit_move_insn (destptr,
23292 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23293 emit_move_insn (srcptr,
23294 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23296 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23297 piece_size);
23298 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23299 piece_size);
23302 /* Update DST and SRC rtx. */
23303 *srcmem = src;
23304 return dst;
23307 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23308 static void
23309 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23310 rtx destptr, rtx srcptr, rtx count, int max_size)
23312 rtx src, dest;
23313 if (CONST_INT_P (count))
23315 HOST_WIDE_INT countval = INTVAL (count);
23316 HOST_WIDE_INT epilogue_size = countval % max_size;
23317 int i;
23319 /* For now MAX_SIZE should be a power of 2. This assert could be
23320 relaxed, but it'll require a bit more complicated epilogue
23321 expanding. */
23322 gcc_assert ((max_size & (max_size - 1)) == 0);
23323 for (i = max_size; i >= 1; i >>= 1)
23325 if (epilogue_size & i)
23326 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23328 return;
23330 if (max_size > 8)
23332 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23333 count, 1, OPTAB_DIRECT);
23334 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23335 count, QImode, 1, 4, false);
23336 return;
23339 /* When there are stringops, we can cheaply increase dest and src pointers.
23340 Otherwise we save code size by maintaining offset (zero is readily
23341 available from preceding rep operation) and using x86 addressing modes.
23343 if (TARGET_SINGLE_STRINGOP)
23345 if (max_size > 4)
23347 rtx label = ix86_expand_aligntest (count, 4, true);
23348 src = change_address (srcmem, SImode, srcptr);
23349 dest = change_address (destmem, SImode, destptr);
23350 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23351 emit_label (label);
23352 LABEL_NUSES (label) = 1;
23354 if (max_size > 2)
23356 rtx label = ix86_expand_aligntest (count, 2, true);
23357 src = change_address (srcmem, HImode, srcptr);
23358 dest = change_address (destmem, HImode, destptr);
23359 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23360 emit_label (label);
23361 LABEL_NUSES (label) = 1;
23363 if (max_size > 1)
23365 rtx label = ix86_expand_aligntest (count, 1, true);
23366 src = change_address (srcmem, QImode, srcptr);
23367 dest = change_address (destmem, QImode, destptr);
23368 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23369 emit_label (label);
23370 LABEL_NUSES (label) = 1;
23373 else
23375 rtx offset = force_reg (Pmode, const0_rtx);
23376 rtx tmp;
23378 if (max_size > 4)
23380 rtx label = ix86_expand_aligntest (count, 4, true);
23381 src = change_address (srcmem, SImode, srcptr);
23382 dest = change_address (destmem, SImode, destptr);
23383 emit_move_insn (dest, src);
23384 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23385 true, OPTAB_LIB_WIDEN);
23386 if (tmp != offset)
23387 emit_move_insn (offset, tmp);
23388 emit_label (label);
23389 LABEL_NUSES (label) = 1;
23391 if (max_size > 2)
23393 rtx label = ix86_expand_aligntest (count, 2, true);
23394 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23395 src = change_address (srcmem, HImode, tmp);
23396 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23397 dest = change_address (destmem, HImode, tmp);
23398 emit_move_insn (dest, src);
23399 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23400 true, OPTAB_LIB_WIDEN);
23401 if (tmp != offset)
23402 emit_move_insn (offset, tmp);
23403 emit_label (label);
23404 LABEL_NUSES (label) = 1;
23406 if (max_size > 1)
23408 rtx label = ix86_expand_aligntest (count, 1, true);
23409 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23410 src = change_address (srcmem, QImode, tmp);
23411 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23412 dest = change_address (destmem, QImode, tmp);
23413 emit_move_insn (dest, src);
23414 emit_label (label);
23415 LABEL_NUSES (label) = 1;
23420 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23421 with value PROMOTED_VAL.
23422 SRC is passed by pointer to be updated on return.
23423 Return value is updated DST. */
23424 static rtx
23425 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23426 HOST_WIDE_INT size_to_move)
23428 rtx dst = destmem, adjust;
23429 enum insn_code code;
23430 enum machine_mode move_mode;
23431 int piece_size, i;
23433 /* Find the widest mode in which we could perform moves.
23434 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23435 it until move of such size is supported. */
23436 move_mode = GET_MODE (promoted_val);
23437 if (move_mode == VOIDmode)
23438 move_mode = QImode;
23439 if (size_to_move < GET_MODE_SIZE (move_mode))
23441 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23442 promoted_val = gen_lowpart (move_mode, promoted_val);
23444 piece_size = GET_MODE_SIZE (move_mode);
23445 code = optab_handler (mov_optab, move_mode);
23446 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23448 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23450 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23451 gcc_assert (size_to_move % piece_size == 0);
23452 adjust = GEN_INT (piece_size);
23453 for (i = 0; i < size_to_move; i += piece_size)
23455 if (piece_size <= GET_MODE_SIZE (word_mode))
23457 emit_insn (gen_strset (destptr, dst, promoted_val));
23458 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23459 piece_size);
23460 continue;
23463 emit_insn (GEN_FCN (code) (dst, promoted_val));
23465 emit_move_insn (destptr,
23466 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23468 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23469 piece_size);
23472 /* Update DST rtx. */
23473 return dst;
23475 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23476 static void
23477 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23478 rtx count, int max_size)
23480 count =
23481 expand_simple_binop (counter_mode (count), AND, count,
23482 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23483 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23484 gen_lowpart (QImode, value), count, QImode,
23485 1, max_size / 2, true);
23488 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23489 static void
23490 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23491 rtx count, int max_size)
23493 rtx dest;
23495 if (CONST_INT_P (count))
23497 HOST_WIDE_INT countval = INTVAL (count);
23498 HOST_WIDE_INT epilogue_size = countval % max_size;
23499 int i;
23501 /* For now MAX_SIZE should be a power of 2. This assert could be
23502 relaxed, but it'll require a bit more complicated epilogue
23503 expanding. */
23504 gcc_assert ((max_size & (max_size - 1)) == 0);
23505 for (i = max_size; i >= 1; i >>= 1)
23507 if (epilogue_size & i)
23509 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23510 destmem = emit_memset (destmem, destptr, vec_value, i);
23511 else
23512 destmem = emit_memset (destmem, destptr, value, i);
23515 return;
23517 if (max_size > 32)
23519 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23520 return;
23522 if (max_size > 16)
23524 rtx label = ix86_expand_aligntest (count, 16, true);
23525 if (TARGET_64BIT)
23527 dest = change_address (destmem, DImode, destptr);
23528 emit_insn (gen_strset (destptr, dest, value));
23529 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23530 emit_insn (gen_strset (destptr, dest, value));
23532 else
23534 dest = change_address (destmem, SImode, destptr);
23535 emit_insn (gen_strset (destptr, dest, value));
23536 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23537 emit_insn (gen_strset (destptr, dest, value));
23538 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23539 emit_insn (gen_strset (destptr, dest, value));
23540 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23541 emit_insn (gen_strset (destptr, dest, value));
23543 emit_label (label);
23544 LABEL_NUSES (label) = 1;
23546 if (max_size > 8)
23548 rtx label = ix86_expand_aligntest (count, 8, true);
23549 if (TARGET_64BIT)
23551 dest = change_address (destmem, DImode, destptr);
23552 emit_insn (gen_strset (destptr, dest, value));
23554 else
23556 dest = change_address (destmem, SImode, destptr);
23557 emit_insn (gen_strset (destptr, dest, value));
23558 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23559 emit_insn (gen_strset (destptr, dest, value));
23561 emit_label (label);
23562 LABEL_NUSES (label) = 1;
23564 if (max_size > 4)
23566 rtx label = ix86_expand_aligntest (count, 4, true);
23567 dest = change_address (destmem, SImode, destptr);
23568 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23569 emit_label (label);
23570 LABEL_NUSES (label) = 1;
23572 if (max_size > 2)
23574 rtx label = ix86_expand_aligntest (count, 2, true);
23575 dest = change_address (destmem, HImode, destptr);
23576 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23577 emit_label (label);
23578 LABEL_NUSES (label) = 1;
23580 if (max_size > 1)
23582 rtx label = ix86_expand_aligntest (count, 1, true);
23583 dest = change_address (destmem, QImode, destptr);
23584 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23585 emit_label (label);
23586 LABEL_NUSES (label) = 1;
23590 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23591 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23592 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23593 ignored.
23594 Return value is updated DESTMEM. */
23595 static rtx
23596 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23597 rtx destptr, rtx srcptr, rtx value,
23598 rtx vec_value, rtx count, int align,
23599 int desired_alignment, bool issetmem)
23601 int i;
23602 for (i = 1; i < desired_alignment; i <<= 1)
23604 if (align <= i)
23606 rtx label = ix86_expand_aligntest (destptr, i, false);
23607 if (issetmem)
23609 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23610 destmem = emit_memset (destmem, destptr, vec_value, i);
23611 else
23612 destmem = emit_memset (destmem, destptr, value, i);
23614 else
23615 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23616 ix86_adjust_counter (count, i);
23617 emit_label (label);
23618 LABEL_NUSES (label) = 1;
23619 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23622 return destmem;
23625 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23626 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23627 and jump to DONE_LABEL. */
23628 static void
23629 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23630 rtx destptr, rtx srcptr,
23631 rtx value, rtx vec_value,
23632 rtx count, int size,
23633 rtx done_label, bool issetmem)
23635 rtx label = ix86_expand_aligntest (count, size, false);
23636 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23637 rtx modesize;
23638 int n;
23640 /* If we do not have vector value to copy, we must reduce size. */
23641 if (issetmem)
23643 if (!vec_value)
23645 if (GET_MODE (value) == VOIDmode && size > 8)
23646 mode = Pmode;
23647 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23648 mode = GET_MODE (value);
23650 else
23651 mode = GET_MODE (vec_value), value = vec_value;
23653 else
23655 /* Choose appropriate vector mode. */
23656 if (size >= 32)
23657 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23658 else if (size >= 16)
23659 mode = TARGET_SSE ? V16QImode : DImode;
23660 srcmem = change_address (srcmem, mode, srcptr);
23662 destmem = change_address (destmem, mode, destptr);
23663 modesize = GEN_INT (GET_MODE_SIZE (mode));
23664 gcc_assert (GET_MODE_SIZE (mode) <= size);
23665 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23667 if (issetmem)
23668 emit_move_insn (destmem, gen_lowpart (mode, value));
23669 else
23671 emit_move_insn (destmem, srcmem);
23672 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23674 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23677 destmem = offset_address (destmem, count, 1);
23678 destmem = offset_address (destmem, GEN_INT (-2 * size),
23679 GET_MODE_SIZE (mode));
23680 if (!issetmem)
23682 srcmem = offset_address (srcmem, count, 1);
23683 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23684 GET_MODE_SIZE (mode));
23686 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23688 if (issetmem)
23689 emit_move_insn (destmem, gen_lowpart (mode, value));
23690 else
23692 emit_move_insn (destmem, srcmem);
23693 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23695 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23697 emit_jump_insn (gen_jump (done_label));
23698 emit_barrier ();
23700 emit_label (label);
23701 LABEL_NUSES (label) = 1;
23704 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23705 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23706 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23707 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23708 DONE_LABEL is a label after the whole copying sequence. The label is created
23709 on demand if *DONE_LABEL is NULL.
23710 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23711 bounds after the initial copies.
23713 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23714 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23715 we will dispatch to a library call for large blocks.
23717 In pseudocode we do:
23719 if (COUNT < SIZE)
23721 Assume that SIZE is 4. Bigger sizes are handled analogously
23722 if (COUNT & 4)
23724 copy 4 bytes from SRCPTR to DESTPTR
23725 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23726 goto done_label
23728 if (!COUNT)
23729 goto done_label;
23730 copy 1 byte from SRCPTR to DESTPTR
23731 if (COUNT & 2)
23733 copy 2 bytes from SRCPTR to DESTPTR
23734 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23737 else
23739 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23740 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23742 OLD_DESPTR = DESTPTR;
23743 Align DESTPTR up to DESIRED_ALIGN
23744 SRCPTR += DESTPTR - OLD_DESTPTR
23745 COUNT -= DEST_PTR - OLD_DESTPTR
23746 if (DYNAMIC_CHECK)
23747 Round COUNT down to multiple of SIZE
23748 << optional caller supplied zero size guard is here >>
23749 << optional caller suppplied dynamic check is here >>
23750 << caller supplied main copy loop is here >>
23752 done_label:
23754 static void
23755 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23756 rtx *destptr, rtx *srcptr,
23757 enum machine_mode mode,
23758 rtx value, rtx vec_value,
23759 rtx *count,
23760 rtx *done_label,
23761 int size,
23762 int desired_align,
23763 int align,
23764 unsigned HOST_WIDE_INT *min_size,
23765 bool dynamic_check,
23766 bool issetmem)
23768 rtx loop_label = NULL, label;
23769 int n;
23770 rtx modesize;
23771 int prolog_size = 0;
23772 rtx mode_value;
23774 /* Chose proper value to copy. */
23775 if (issetmem && VECTOR_MODE_P (mode))
23776 mode_value = vec_value;
23777 else
23778 mode_value = value;
23779 gcc_assert (GET_MODE_SIZE (mode) <= size);
23781 /* See if block is big or small, handle small blocks. */
23782 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23784 int size2 = size;
23785 loop_label = gen_label_rtx ();
23787 if (!*done_label)
23788 *done_label = gen_label_rtx ();
23790 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23791 1, loop_label);
23792 size2 >>= 1;
23794 /* Handle sizes > 3. */
23795 for (;size2 > 2; size2 >>= 1)
23796 expand_small_movmem_or_setmem (destmem, srcmem,
23797 *destptr, *srcptr,
23798 value, vec_value,
23799 *count,
23800 size2, *done_label, issetmem);
23801 /* Nothing to copy? Jump to DONE_LABEL if so */
23802 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23803 1, *done_label);
23805 /* Do a byte copy. */
23806 destmem = change_address (destmem, QImode, *destptr);
23807 if (issetmem)
23808 emit_move_insn (destmem, gen_lowpart (QImode, value));
23809 else
23811 srcmem = change_address (srcmem, QImode, *srcptr);
23812 emit_move_insn (destmem, srcmem);
23815 /* Handle sizes 2 and 3. */
23816 label = ix86_expand_aligntest (*count, 2, false);
23817 destmem = change_address (destmem, HImode, *destptr);
23818 destmem = offset_address (destmem, *count, 1);
23819 destmem = offset_address (destmem, GEN_INT (-2), 2);
23820 if (issetmem)
23821 emit_move_insn (destmem, gen_lowpart (HImode, value));
23822 else
23824 srcmem = change_address (srcmem, HImode, *srcptr);
23825 srcmem = offset_address (srcmem, *count, 1);
23826 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23827 emit_move_insn (destmem, srcmem);
23830 emit_label (label);
23831 LABEL_NUSES (label) = 1;
23832 emit_jump_insn (gen_jump (*done_label));
23833 emit_barrier ();
23835 else
23836 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23837 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23839 /* Start memcpy for COUNT >= SIZE. */
23840 if (loop_label)
23842 emit_label (loop_label);
23843 LABEL_NUSES (loop_label) = 1;
23846 /* Copy first desired_align bytes. */
23847 if (!issetmem)
23848 srcmem = change_address (srcmem, mode, *srcptr);
23849 destmem = change_address (destmem, mode, *destptr);
23850 modesize = GEN_INT (GET_MODE_SIZE (mode));
23851 for (n = 0; prolog_size < desired_align - align; n++)
23853 if (issetmem)
23854 emit_move_insn (destmem, mode_value);
23855 else
23857 emit_move_insn (destmem, srcmem);
23858 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23860 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23861 prolog_size += GET_MODE_SIZE (mode);
23865 /* Copy last SIZE bytes. */
23866 destmem = offset_address (destmem, *count, 1);
23867 destmem = offset_address (destmem,
23868 GEN_INT (-size - prolog_size),
23870 if (issetmem)
23871 emit_move_insn (destmem, mode_value);
23872 else
23874 srcmem = offset_address (srcmem, *count, 1);
23875 srcmem = offset_address (srcmem,
23876 GEN_INT (-size - prolog_size),
23878 emit_move_insn (destmem, srcmem);
23880 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23882 destmem = offset_address (destmem, modesize, 1);
23883 if (issetmem)
23884 emit_move_insn (destmem, mode_value);
23885 else
23887 srcmem = offset_address (srcmem, modesize, 1);
23888 emit_move_insn (destmem, srcmem);
23892 /* Align destination. */
23893 if (desired_align > 1 && desired_align > align)
23895 rtx saveddest = *destptr;
23897 gcc_assert (desired_align <= size);
23898 /* Align destptr up, place it to new register. */
23899 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23900 GEN_INT (prolog_size),
23901 NULL_RTX, 1, OPTAB_DIRECT);
23902 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23903 GEN_INT (-desired_align),
23904 *destptr, 1, OPTAB_DIRECT);
23905 /* See how many bytes we skipped. */
23906 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23907 *destptr,
23908 saveddest, 1, OPTAB_DIRECT);
23909 /* Adjust srcptr and count. */
23910 if (!issetmem)
23911 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23912 *srcptr, 1, OPTAB_DIRECT);
23913 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23914 saveddest, *count, 1, OPTAB_DIRECT);
23915 /* We copied at most size + prolog_size. */
23916 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23917 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23918 else
23919 *min_size = 0;
23921 /* Our loops always round down the bock size, but for dispatch to library
23922 we need precise value. */
23923 if (dynamic_check)
23924 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23925 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23927 else
23929 gcc_assert (prolog_size == 0);
23930 /* Decrease count, so we won't end up copying last word twice. */
23931 if (!CONST_INT_P (*count))
23932 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23933 constm1_rtx, *count, 1, OPTAB_DIRECT);
23934 else
23935 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23936 if (*min_size)
23937 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23942 /* This function is like the previous one, except here we know how many bytes
23943 need to be copied. That allows us to update alignment not only of DST, which
23944 is returned, but also of SRC, which is passed as a pointer for that
23945 reason. */
23946 static rtx
23947 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23948 rtx srcreg, rtx value, rtx vec_value,
23949 int desired_align, int align_bytes,
23950 bool issetmem)
23952 rtx src = NULL;
23953 rtx orig_dst = dst;
23954 rtx orig_src = NULL;
23955 int piece_size = 1;
23956 int copied_bytes = 0;
23958 if (!issetmem)
23960 gcc_assert (srcp != NULL);
23961 src = *srcp;
23962 orig_src = src;
23965 for (piece_size = 1;
23966 piece_size <= desired_align && copied_bytes < align_bytes;
23967 piece_size <<= 1)
23969 if (align_bytes & piece_size)
23971 if (issetmem)
23973 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23974 dst = emit_memset (dst, destreg, vec_value, piece_size);
23975 else
23976 dst = emit_memset (dst, destreg, value, piece_size);
23978 else
23979 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23980 copied_bytes += piece_size;
23983 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23984 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23985 if (MEM_SIZE_KNOWN_P (orig_dst))
23986 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23988 if (!issetmem)
23990 int src_align_bytes = get_mem_align_offset (src, desired_align
23991 * BITS_PER_UNIT);
23992 if (src_align_bytes >= 0)
23993 src_align_bytes = desired_align - src_align_bytes;
23994 if (src_align_bytes >= 0)
23996 unsigned int src_align;
23997 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23999 if ((src_align_bytes & (src_align - 1))
24000 == (align_bytes & (src_align - 1)))
24001 break;
24003 if (src_align > (unsigned int) desired_align)
24004 src_align = desired_align;
24005 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24006 set_mem_align (src, src_align * BITS_PER_UNIT);
24008 if (MEM_SIZE_KNOWN_P (orig_src))
24009 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24010 *srcp = src;
24013 return dst;
24016 /* Return true if ALG can be used in current context.
24017 Assume we expand memset if MEMSET is true. */
24018 static bool
24019 alg_usable_p (enum stringop_alg alg, bool memset)
24021 if (alg == no_stringop)
24022 return false;
24023 if (alg == vector_loop)
24024 return TARGET_SSE || TARGET_AVX;
24025 /* Algorithms using the rep prefix want at least edi and ecx;
24026 additionally, memset wants eax and memcpy wants esi. Don't
24027 consider such algorithms if the user has appropriated those
24028 registers for their own purposes. */
24029 if (alg == rep_prefix_1_byte
24030 || alg == rep_prefix_4_byte
24031 || alg == rep_prefix_8_byte)
24032 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24033 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24034 return true;
24037 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24038 static enum stringop_alg
24039 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24040 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24041 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24043 const struct stringop_algs * algs;
24044 bool optimize_for_speed;
24045 int max = -1;
24046 const struct processor_costs *cost;
24047 int i;
24048 bool any_alg_usable_p = false;
24050 *noalign = false;
24051 *dynamic_check = -1;
24053 /* Even if the string operation call is cold, we still might spend a lot
24054 of time processing large blocks. */
24055 if (optimize_function_for_size_p (cfun)
24056 || (optimize_insn_for_size_p ()
24057 && (max_size < 256
24058 || (expected_size != -1 && expected_size < 256))))
24059 optimize_for_speed = false;
24060 else
24061 optimize_for_speed = true;
24063 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24064 if (memset)
24065 algs = &cost->memset[TARGET_64BIT != 0];
24066 else
24067 algs = &cost->memcpy[TARGET_64BIT != 0];
24069 /* See maximal size for user defined algorithm. */
24070 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24072 enum stringop_alg candidate = algs->size[i].alg;
24073 bool usable = alg_usable_p (candidate, memset);
24074 any_alg_usable_p |= usable;
24076 if (candidate != libcall && candidate && usable)
24077 max = algs->size[i].max;
24080 /* If expected size is not known but max size is small enough
24081 so inline version is a win, set expected size into
24082 the range. */
24083 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
24084 && expected_size == -1)
24085 expected_size = min_size / 2 + max_size / 2;
24087 /* If user specified the algorithm, honnor it if possible. */
24088 if (ix86_stringop_alg != no_stringop
24089 && alg_usable_p (ix86_stringop_alg, memset))
24090 return ix86_stringop_alg;
24091 /* rep; movq or rep; movl is the smallest variant. */
24092 else if (!optimize_for_speed)
24094 *noalign = true;
24095 if (!count || (count & 3) || (memset && !zero_memset))
24096 return alg_usable_p (rep_prefix_1_byte, memset)
24097 ? rep_prefix_1_byte : loop_1_byte;
24098 else
24099 return alg_usable_p (rep_prefix_4_byte, memset)
24100 ? rep_prefix_4_byte : loop;
24102 /* Very tiny blocks are best handled via the loop, REP is expensive to
24103 setup. */
24104 else if (expected_size != -1 && expected_size < 4)
24105 return loop_1_byte;
24106 else if (expected_size != -1)
24108 enum stringop_alg alg = libcall;
24109 bool alg_noalign = false;
24110 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24112 /* We get here if the algorithms that were not libcall-based
24113 were rep-prefix based and we are unable to use rep prefixes
24114 based on global register usage. Break out of the loop and
24115 use the heuristic below. */
24116 if (algs->size[i].max == 0)
24117 break;
24118 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24120 enum stringop_alg candidate = algs->size[i].alg;
24122 if (candidate != libcall && alg_usable_p (candidate, memset))
24124 alg = candidate;
24125 alg_noalign = algs->size[i].noalign;
24127 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24128 last non-libcall inline algorithm. */
24129 if (TARGET_INLINE_ALL_STRINGOPS)
24131 /* When the current size is best to be copied by a libcall,
24132 but we are still forced to inline, run the heuristic below
24133 that will pick code for medium sized blocks. */
24134 if (alg != libcall)
24136 *noalign = alg_noalign;
24137 return alg;
24139 break;
24141 else if (alg_usable_p (candidate, memset))
24143 *noalign = algs->size[i].noalign;
24144 return candidate;
24149 /* When asked to inline the call anyway, try to pick meaningful choice.
24150 We look for maximal size of block that is faster to copy by hand and
24151 take blocks of at most of that size guessing that average size will
24152 be roughly half of the block.
24154 If this turns out to be bad, we might simply specify the preferred
24155 choice in ix86_costs. */
24156 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24157 && (algs->unknown_size == libcall
24158 || !alg_usable_p (algs->unknown_size, memset)))
24160 enum stringop_alg alg;
24162 /* If there aren't any usable algorithms, then recursing on
24163 smaller sizes isn't going to find anything. Just return the
24164 simple byte-at-a-time copy loop. */
24165 if (!any_alg_usable_p)
24167 /* Pick something reasonable. */
24168 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24169 *dynamic_check = 128;
24170 return loop_1_byte;
24172 if (max == -1)
24173 max = 4096;
24174 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24175 zero_memset, dynamic_check, noalign);
24176 gcc_assert (*dynamic_check == -1);
24177 gcc_assert (alg != libcall);
24178 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24179 *dynamic_check = max;
24180 return alg;
24182 return (alg_usable_p (algs->unknown_size, memset)
24183 ? algs->unknown_size : libcall);
24186 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24187 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24188 static int
24189 decide_alignment (int align,
24190 enum stringop_alg alg,
24191 int expected_size,
24192 enum machine_mode move_mode)
24194 int desired_align = 0;
24196 gcc_assert (alg != no_stringop);
24198 if (alg == libcall)
24199 return 0;
24200 if (move_mode == VOIDmode)
24201 return 0;
24203 desired_align = GET_MODE_SIZE (move_mode);
24204 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24205 copying whole cacheline at once. */
24206 if (TARGET_PENTIUMPRO
24207 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24208 desired_align = 8;
24210 if (optimize_size)
24211 desired_align = 1;
24212 if (desired_align < align)
24213 desired_align = align;
24214 if (expected_size != -1 && expected_size < 4)
24215 desired_align = align;
24217 return desired_align;
24221 /* Helper function for memcpy. For QImode value 0xXY produce
24222 0xXYXYXYXY of wide specified by MODE. This is essentially
24223 a * 0x10101010, but we can do slightly better than
24224 synth_mult by unwinding the sequence by hand on CPUs with
24225 slow multiply. */
24226 static rtx
24227 promote_duplicated_reg (enum machine_mode mode, rtx val)
24229 enum machine_mode valmode = GET_MODE (val);
24230 rtx tmp;
24231 int nops = mode == DImode ? 3 : 2;
24233 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24234 if (val == const0_rtx)
24235 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24236 if (CONST_INT_P (val))
24238 HOST_WIDE_INT v = INTVAL (val) & 255;
24240 v |= v << 8;
24241 v |= v << 16;
24242 if (mode == DImode)
24243 v |= (v << 16) << 16;
24244 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24247 if (valmode == VOIDmode)
24248 valmode = QImode;
24249 if (valmode != QImode)
24250 val = gen_lowpart (QImode, val);
24251 if (mode == QImode)
24252 return val;
24253 if (!TARGET_PARTIAL_REG_STALL)
24254 nops--;
24255 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24256 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24257 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24258 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24260 rtx reg = convert_modes (mode, QImode, val, true);
24261 tmp = promote_duplicated_reg (mode, const1_rtx);
24262 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24263 OPTAB_DIRECT);
24265 else
24267 rtx reg = convert_modes (mode, QImode, val, true);
24269 if (!TARGET_PARTIAL_REG_STALL)
24270 if (mode == SImode)
24271 emit_insn (gen_movsi_insv_1 (reg, reg));
24272 else
24273 emit_insn (gen_movdi_insv_1 (reg, reg));
24274 else
24276 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24277 NULL, 1, OPTAB_DIRECT);
24278 reg =
24279 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24281 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24282 NULL, 1, OPTAB_DIRECT);
24283 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24284 if (mode == SImode)
24285 return reg;
24286 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24287 NULL, 1, OPTAB_DIRECT);
24288 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24289 return reg;
24293 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24294 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24295 alignment from ALIGN to DESIRED_ALIGN. */
24296 static rtx
24297 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24298 int align)
24300 rtx promoted_val;
24302 if (TARGET_64BIT
24303 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24304 promoted_val = promote_duplicated_reg (DImode, val);
24305 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24306 promoted_val = promote_duplicated_reg (SImode, val);
24307 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24308 promoted_val = promote_duplicated_reg (HImode, val);
24309 else
24310 promoted_val = val;
24312 return promoted_val;
24315 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24316 operations when profitable. The code depends upon architecture, block size
24317 and alignment, but always has one of the following overall structures:
24319 Aligned move sequence:
24321 1) Prologue guard: Conditional that jumps up to epilogues for small
24322 blocks that can be handled by epilogue alone. This is faster
24323 but also needed for correctness, since prologue assume the block
24324 is larger than the desired alignment.
24326 Optional dynamic check for size and libcall for large
24327 blocks is emitted here too, with -minline-stringops-dynamically.
24329 2) Prologue: copy first few bytes in order to get destination
24330 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24331 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24332 copied. We emit either a jump tree on power of two sized
24333 blocks, or a byte loop.
24335 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24336 with specified algorithm.
24338 4) Epilogue: code copying tail of the block that is too small to be
24339 handled by main body (or up to size guarded by prologue guard).
24341 Misaligned move sequence
24343 1) missaligned move prologue/epilogue containing:
24344 a) Prologue handling small memory blocks and jumping to done_label
24345 (skipped if blocks are known to be large enough)
24346 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24347 needed by single possibly misaligned move
24348 (skipped if alignment is not needed)
24349 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24351 2) Zero size guard dispatching to done_label, if needed
24353 3) dispatch to library call, if needed,
24355 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24356 with specified algorithm. */
24357 bool
24358 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24359 rtx align_exp, rtx expected_align_exp,
24360 rtx expected_size_exp, rtx min_size_exp,
24361 rtx max_size_exp, rtx probable_max_size_exp,
24362 bool issetmem)
24364 rtx destreg;
24365 rtx srcreg = NULL;
24366 rtx label = NULL;
24367 rtx tmp;
24368 rtx jump_around_label = NULL;
24369 HOST_WIDE_INT align = 1;
24370 unsigned HOST_WIDE_INT count = 0;
24371 HOST_WIDE_INT expected_size = -1;
24372 int size_needed = 0, epilogue_size_needed;
24373 int desired_align = 0, align_bytes = 0;
24374 enum stringop_alg alg;
24375 rtx promoted_val = NULL;
24376 rtx vec_promoted_val = NULL;
24377 bool force_loopy_epilogue = false;
24378 int dynamic_check;
24379 bool need_zero_guard = false;
24380 bool noalign;
24381 enum machine_mode move_mode = VOIDmode;
24382 int unroll_factor = 1;
24383 /* TODO: Once value ranges are available, fill in proper data. */
24384 unsigned HOST_WIDE_INT min_size = 0;
24385 unsigned HOST_WIDE_INT max_size = -1;
24386 unsigned HOST_WIDE_INT probable_max_size = -1;
24387 bool misaligned_prologue_used = false;
24389 if (CONST_INT_P (align_exp))
24390 align = INTVAL (align_exp);
24391 /* i386 can do misaligned access on reasonably increased cost. */
24392 if (CONST_INT_P (expected_align_exp)
24393 && INTVAL (expected_align_exp) > align)
24394 align = INTVAL (expected_align_exp);
24395 /* ALIGN is the minimum of destination and source alignment, but we care here
24396 just about destination alignment. */
24397 else if (!issetmem
24398 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24399 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24401 if (CONST_INT_P (count_exp))
24402 min_size = max_size = probable_max_size = count = expected_size
24403 = INTVAL (count_exp);
24404 else
24406 if (min_size_exp)
24407 min_size = INTVAL (min_size_exp);
24408 if (max_size_exp)
24409 max_size = INTVAL (max_size_exp);
24410 if (probable_max_size_exp)
24411 probable_max_size = INTVAL (probable_max_size_exp);
24412 if (CONST_INT_P (expected_size_exp) && count == 0)
24413 expected_size = INTVAL (expected_size_exp);
24416 /* Make sure we don't need to care about overflow later on. */
24417 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24418 return false;
24420 /* Step 0: Decide on preferred algorithm, desired alignment and
24421 size of chunks to be copied by main loop. */
24422 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24423 issetmem,
24424 issetmem && val_exp == const0_rtx,
24425 &dynamic_check, &noalign);
24426 if (alg == libcall)
24427 return false;
24428 gcc_assert (alg != no_stringop);
24430 /* For now vector-version of memset is generated only for memory zeroing, as
24431 creating of promoted vector value is very cheap in this case. */
24432 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24433 alg = unrolled_loop;
24435 if (!count)
24436 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24437 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24438 if (!issetmem)
24439 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24441 unroll_factor = 1;
24442 move_mode = word_mode;
24443 switch (alg)
24445 case libcall:
24446 case no_stringop:
24447 case last_alg:
24448 gcc_unreachable ();
24449 case loop_1_byte:
24450 need_zero_guard = true;
24451 move_mode = QImode;
24452 break;
24453 case loop:
24454 need_zero_guard = true;
24455 break;
24456 case unrolled_loop:
24457 need_zero_guard = true;
24458 unroll_factor = (TARGET_64BIT ? 4 : 2);
24459 break;
24460 case vector_loop:
24461 need_zero_guard = true;
24462 unroll_factor = 4;
24463 /* Find the widest supported mode. */
24464 move_mode = word_mode;
24465 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24466 != CODE_FOR_nothing)
24467 move_mode = GET_MODE_WIDER_MODE (move_mode);
24469 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24470 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24471 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24473 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24474 move_mode = mode_for_vector (word_mode, nunits);
24475 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24476 move_mode = word_mode;
24478 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24479 break;
24480 case rep_prefix_8_byte:
24481 move_mode = DImode;
24482 break;
24483 case rep_prefix_4_byte:
24484 move_mode = SImode;
24485 break;
24486 case rep_prefix_1_byte:
24487 move_mode = QImode;
24488 break;
24490 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24491 epilogue_size_needed = size_needed;
24493 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24494 if (!TARGET_ALIGN_STRINGOPS || noalign)
24495 align = desired_align;
24497 /* Step 1: Prologue guard. */
24499 /* Alignment code needs count to be in register. */
24500 if (CONST_INT_P (count_exp) && desired_align > align)
24502 if (INTVAL (count_exp) > desired_align
24503 && INTVAL (count_exp) > size_needed)
24505 align_bytes
24506 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24507 if (align_bytes <= 0)
24508 align_bytes = 0;
24509 else
24510 align_bytes = desired_align - align_bytes;
24512 if (align_bytes == 0)
24513 count_exp = force_reg (counter_mode (count_exp), count_exp);
24515 gcc_assert (desired_align >= 1 && align >= 1);
24517 /* Misaligned move sequences handle both prologue and epilogue at once.
24518 Default code generation results in a smaller code for large alignments
24519 and also avoids redundant job when sizes are known precisely. */
24520 misaligned_prologue_used
24521 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24522 && MAX (desired_align, epilogue_size_needed) <= 32
24523 && desired_align <= epilogue_size_needed
24524 && ((desired_align > align && !align_bytes)
24525 || (!count && epilogue_size_needed > 1)));
24527 /* Do the cheap promotion to allow better CSE across the
24528 main loop and epilogue (ie one load of the big constant in the
24529 front of all code.
24530 For now the misaligned move sequences do not have fast path
24531 without broadcasting. */
24532 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24534 if (alg == vector_loop)
24536 gcc_assert (val_exp == const0_rtx);
24537 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24538 promoted_val = promote_duplicated_reg_to_size (val_exp,
24539 GET_MODE_SIZE (word_mode),
24540 desired_align, align);
24542 else
24544 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24545 desired_align, align);
24548 /* Misaligned move sequences handles both prologues and epilogues at once.
24549 Default code generation results in smaller code for large alignments and
24550 also avoids redundant job when sizes are known precisely. */
24551 if (misaligned_prologue_used)
24553 /* Misaligned move prologue handled small blocks by itself. */
24554 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24555 (dst, src, &destreg, &srcreg,
24556 move_mode, promoted_val, vec_promoted_val,
24557 &count_exp,
24558 &jump_around_label,
24559 desired_align < align
24560 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24561 desired_align, align, &min_size, dynamic_check, issetmem);
24562 if (!issetmem)
24563 src = change_address (src, BLKmode, srcreg);
24564 dst = change_address (dst, BLKmode, destreg);
24565 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24566 epilogue_size_needed = 0;
24567 if (need_zero_guard && !min_size)
24569 /* It is possible that we copied enough so the main loop will not
24570 execute. */
24571 gcc_assert (size_needed > 1);
24572 if (jump_around_label == NULL_RTX)
24573 jump_around_label = gen_label_rtx ();
24574 emit_cmp_and_jump_insns (count_exp,
24575 GEN_INT (size_needed),
24576 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24577 if (expected_size == -1
24578 || expected_size < (desired_align - align) / 2 + size_needed)
24579 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24580 else
24581 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24584 /* Ensure that alignment prologue won't copy past end of block. */
24585 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24587 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24588 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24589 Make sure it is power of 2. */
24590 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24592 /* To improve performance of small blocks, we jump around the VAL
24593 promoting mode. This mean that if the promoted VAL is not constant,
24594 we might not use it in the epilogue and have to use byte
24595 loop variant. */
24596 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24597 force_loopy_epilogue = true;
24598 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24599 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24601 /* If main algorithm works on QImode, no epilogue is needed.
24602 For small sizes just don't align anything. */
24603 if (size_needed == 1)
24604 desired_align = align;
24605 else
24606 goto epilogue;
24608 else if (!count
24609 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24611 label = gen_label_rtx ();
24612 emit_cmp_and_jump_insns (count_exp,
24613 GEN_INT (epilogue_size_needed),
24614 LTU, 0, counter_mode (count_exp), 1, label);
24615 if (expected_size == -1 || expected_size < epilogue_size_needed)
24616 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24617 else
24618 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24622 /* Emit code to decide on runtime whether library call or inline should be
24623 used. */
24624 if (dynamic_check != -1)
24626 if (!issetmem && CONST_INT_P (count_exp))
24628 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24630 emit_block_move_via_libcall (dst, src, count_exp, false);
24631 count_exp = const0_rtx;
24632 goto epilogue;
24635 else
24637 rtx hot_label = gen_label_rtx ();
24638 if (jump_around_label == NULL_RTX)
24639 jump_around_label = gen_label_rtx ();
24640 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24641 LEU, 0, GET_MODE (count_exp), 1, hot_label);
24642 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24643 if (issetmem)
24644 set_storage_via_libcall (dst, count_exp, val_exp, false);
24645 else
24646 emit_block_move_via_libcall (dst, src, count_exp, false);
24647 emit_jump (jump_around_label);
24648 emit_label (hot_label);
24652 /* Step 2: Alignment prologue. */
24653 /* Do the expensive promotion once we branched off the small blocks. */
24654 if (issetmem && !promoted_val)
24655 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24656 desired_align, align);
24658 if (desired_align > align && !misaligned_prologue_used)
24660 if (align_bytes == 0)
24662 /* Except for the first move in prologue, we no longer know
24663 constant offset in aliasing info. It don't seems to worth
24664 the pain to maintain it for the first move, so throw away
24665 the info early. */
24666 dst = change_address (dst, BLKmode, destreg);
24667 if (!issetmem)
24668 src = change_address (src, BLKmode, srcreg);
24669 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24670 promoted_val, vec_promoted_val,
24671 count_exp, align, desired_align,
24672 issetmem);
24673 /* At most desired_align - align bytes are copied. */
24674 if (min_size < (unsigned)(desired_align - align))
24675 min_size = 0;
24676 else
24677 min_size -= desired_align - align;
24679 else
24681 /* If we know how many bytes need to be stored before dst is
24682 sufficiently aligned, maintain aliasing info accurately. */
24683 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24684 srcreg,
24685 promoted_val,
24686 vec_promoted_val,
24687 desired_align,
24688 align_bytes,
24689 issetmem);
24691 count_exp = plus_constant (counter_mode (count_exp),
24692 count_exp, -align_bytes);
24693 count -= align_bytes;
24694 min_size -= align_bytes;
24695 max_size -= align_bytes;
24697 if (need_zero_guard
24698 && !min_size
24699 && (count < (unsigned HOST_WIDE_INT) size_needed
24700 || (align_bytes == 0
24701 && count < ((unsigned HOST_WIDE_INT) size_needed
24702 + desired_align - align))))
24704 /* It is possible that we copied enough so the main loop will not
24705 execute. */
24706 gcc_assert (size_needed > 1);
24707 if (label == NULL_RTX)
24708 label = gen_label_rtx ();
24709 emit_cmp_and_jump_insns (count_exp,
24710 GEN_INT (size_needed),
24711 LTU, 0, counter_mode (count_exp), 1, label);
24712 if (expected_size == -1
24713 || expected_size < (desired_align - align) / 2 + size_needed)
24714 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24715 else
24716 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24719 if (label && size_needed == 1)
24721 emit_label (label);
24722 LABEL_NUSES (label) = 1;
24723 label = NULL;
24724 epilogue_size_needed = 1;
24725 if (issetmem)
24726 promoted_val = val_exp;
24728 else if (label == NULL_RTX && !misaligned_prologue_used)
24729 epilogue_size_needed = size_needed;
24731 /* Step 3: Main loop. */
24733 switch (alg)
24735 case libcall:
24736 case no_stringop:
24737 case last_alg:
24738 gcc_unreachable ();
24739 case loop_1_byte:
24740 case loop:
24741 case unrolled_loop:
24742 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24743 count_exp, move_mode, unroll_factor,
24744 expected_size, issetmem);
24745 break;
24746 case vector_loop:
24747 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24748 vec_promoted_val, count_exp, move_mode,
24749 unroll_factor, expected_size, issetmem);
24750 break;
24751 case rep_prefix_8_byte:
24752 case rep_prefix_4_byte:
24753 case rep_prefix_1_byte:
24754 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24755 val_exp, count_exp, move_mode, issetmem);
24756 break;
24758 /* Adjust properly the offset of src and dest memory for aliasing. */
24759 if (CONST_INT_P (count_exp))
24761 if (!issetmem)
24762 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24763 (count / size_needed) * size_needed);
24764 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24765 (count / size_needed) * size_needed);
24767 else
24769 if (!issetmem)
24770 src = change_address (src, BLKmode, srcreg);
24771 dst = change_address (dst, BLKmode, destreg);
24774 /* Step 4: Epilogue to copy the remaining bytes. */
24775 epilogue:
24776 if (label)
24778 /* When the main loop is done, COUNT_EXP might hold original count,
24779 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24780 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24781 bytes. Compensate if needed. */
24783 if (size_needed < epilogue_size_needed)
24785 tmp =
24786 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24787 GEN_INT (size_needed - 1), count_exp, 1,
24788 OPTAB_DIRECT);
24789 if (tmp != count_exp)
24790 emit_move_insn (count_exp, tmp);
24792 emit_label (label);
24793 LABEL_NUSES (label) = 1;
24796 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24798 if (force_loopy_epilogue)
24799 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24800 epilogue_size_needed);
24801 else
24803 if (issetmem)
24804 expand_setmem_epilogue (dst, destreg, promoted_val,
24805 vec_promoted_val, count_exp,
24806 epilogue_size_needed);
24807 else
24808 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24809 epilogue_size_needed);
24812 if (jump_around_label)
24813 emit_label (jump_around_label);
24814 return true;
24818 /* Expand the appropriate insns for doing strlen if not just doing
24819 repnz; scasb
24821 out = result, initialized with the start address
24822 align_rtx = alignment of the address.
24823 scratch = scratch register, initialized with the startaddress when
24824 not aligned, otherwise undefined
24826 This is just the body. It needs the initializations mentioned above and
24827 some address computing at the end. These things are done in i386.md. */
24829 static void
24830 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24832 int align;
24833 rtx tmp;
24834 rtx align_2_label = NULL_RTX;
24835 rtx align_3_label = NULL_RTX;
24836 rtx align_4_label = gen_label_rtx ();
24837 rtx end_0_label = gen_label_rtx ();
24838 rtx mem;
24839 rtx tmpreg = gen_reg_rtx (SImode);
24840 rtx scratch = gen_reg_rtx (SImode);
24841 rtx cmp;
24843 align = 0;
24844 if (CONST_INT_P (align_rtx))
24845 align = INTVAL (align_rtx);
24847 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24849 /* Is there a known alignment and is it less than 4? */
24850 if (align < 4)
24852 rtx scratch1 = gen_reg_rtx (Pmode);
24853 emit_move_insn (scratch1, out);
24854 /* Is there a known alignment and is it not 2? */
24855 if (align != 2)
24857 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24858 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24860 /* Leave just the 3 lower bits. */
24861 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24862 NULL_RTX, 0, OPTAB_WIDEN);
24864 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24865 Pmode, 1, align_4_label);
24866 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24867 Pmode, 1, align_2_label);
24868 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24869 Pmode, 1, align_3_label);
24871 else
24873 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24874 check if is aligned to 4 - byte. */
24876 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24877 NULL_RTX, 0, OPTAB_WIDEN);
24879 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24880 Pmode, 1, align_4_label);
24883 mem = change_address (src, QImode, out);
24885 /* Now compare the bytes. */
24887 /* Compare the first n unaligned byte on a byte per byte basis. */
24888 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24889 QImode, 1, end_0_label);
24891 /* Increment the address. */
24892 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24894 /* Not needed with an alignment of 2 */
24895 if (align != 2)
24897 emit_label (align_2_label);
24899 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24900 end_0_label);
24902 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24904 emit_label (align_3_label);
24907 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24908 end_0_label);
24910 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24913 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24914 align this loop. It gives only huge programs, but does not help to
24915 speed up. */
24916 emit_label (align_4_label);
24918 mem = change_address (src, SImode, out);
24919 emit_move_insn (scratch, mem);
24920 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24922 /* This formula yields a nonzero result iff one of the bytes is zero.
24923 This saves three branches inside loop and many cycles. */
24925 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24926 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24927 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24928 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24929 gen_int_mode (0x80808080, SImode)));
24930 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24931 align_4_label);
24933 if (TARGET_CMOVE)
24935 rtx reg = gen_reg_rtx (SImode);
24936 rtx reg2 = gen_reg_rtx (Pmode);
24937 emit_move_insn (reg, tmpreg);
24938 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24940 /* If zero is not in the first two bytes, move two bytes forward. */
24941 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24942 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24943 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24944 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24945 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24946 reg,
24947 tmpreg)));
24948 /* Emit lea manually to avoid clobbering of flags. */
24949 emit_insn (gen_rtx_SET (SImode, reg2,
24950 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24952 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24953 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24954 emit_insn (gen_rtx_SET (VOIDmode, out,
24955 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24956 reg2,
24957 out)));
24959 else
24961 rtx end_2_label = gen_label_rtx ();
24962 /* Is zero in the first two bytes? */
24964 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24965 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24966 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24967 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24968 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24969 pc_rtx);
24970 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24971 JUMP_LABEL (tmp) = end_2_label;
24973 /* Not in the first two. Move two bytes forward. */
24974 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24975 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24977 emit_label (end_2_label);
24981 /* Avoid branch in fixing the byte. */
24982 tmpreg = gen_lowpart (QImode, tmpreg);
24983 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24984 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24985 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24986 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24988 emit_label (end_0_label);
24991 /* Expand strlen. */
24993 bool
24994 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24996 rtx addr, scratch1, scratch2, scratch3, scratch4;
24998 /* The generic case of strlen expander is long. Avoid it's
24999 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25001 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25002 && !TARGET_INLINE_ALL_STRINGOPS
25003 && !optimize_insn_for_size_p ()
25004 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25005 return false;
25007 addr = force_reg (Pmode, XEXP (src, 0));
25008 scratch1 = gen_reg_rtx (Pmode);
25010 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25011 && !optimize_insn_for_size_p ())
25013 /* Well it seems that some optimizer does not combine a call like
25014 foo(strlen(bar), strlen(bar));
25015 when the move and the subtraction is done here. It does calculate
25016 the length just once when these instructions are done inside of
25017 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25018 often used and I use one fewer register for the lifetime of
25019 output_strlen_unroll() this is better. */
25021 emit_move_insn (out, addr);
25023 ix86_expand_strlensi_unroll_1 (out, src, align);
25025 /* strlensi_unroll_1 returns the address of the zero at the end of
25026 the string, like memchr(), so compute the length by subtracting
25027 the start address. */
25028 emit_insn (ix86_gen_sub3 (out, out, addr));
25030 else
25032 rtx unspec;
25034 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25035 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25036 return false;
25038 scratch2 = gen_reg_rtx (Pmode);
25039 scratch3 = gen_reg_rtx (Pmode);
25040 scratch4 = force_reg (Pmode, constm1_rtx);
25042 emit_move_insn (scratch3, addr);
25043 eoschar = force_reg (QImode, eoschar);
25045 src = replace_equiv_address_nv (src, scratch3);
25047 /* If .md starts supporting :P, this can be done in .md. */
25048 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25049 scratch4), UNSPEC_SCAS);
25050 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25051 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25052 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25054 return true;
25057 /* For given symbol (function) construct code to compute address of it's PLT
25058 entry in large x86-64 PIC model. */
25059 static rtx
25060 construct_plt_address (rtx symbol)
25062 rtx tmp, unspec;
25064 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25065 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25066 gcc_assert (Pmode == DImode);
25068 tmp = gen_reg_rtx (Pmode);
25069 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25071 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25072 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25073 return tmp;
25077 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25078 rtx callarg2,
25079 rtx pop, bool sibcall)
25081 unsigned int const cregs_size
25082 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25083 rtx vec[3 + cregs_size];
25084 rtx use = NULL, call;
25085 unsigned int vec_len = 0;
25087 if (pop == const0_rtx)
25088 pop = NULL;
25089 gcc_assert (!TARGET_64BIT || !pop);
25091 if (TARGET_MACHO && !TARGET_64BIT)
25093 #if TARGET_MACHO
25094 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25095 fnaddr = machopic_indirect_call_target (fnaddr);
25096 #endif
25098 else
25100 /* Static functions and indirect calls don't need the pic register. */
25101 if (flag_pic
25102 && (!TARGET_64BIT
25103 || (ix86_cmodel == CM_LARGE_PIC
25104 && DEFAULT_ABI != MS_ABI))
25105 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25106 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25107 use_reg (&use, pic_offset_table_rtx);
25110 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25112 rtx al = gen_rtx_REG (QImode, AX_REG);
25113 emit_move_insn (al, callarg2);
25114 use_reg (&use, al);
25117 if (ix86_cmodel == CM_LARGE_PIC
25118 && !TARGET_PECOFF
25119 && MEM_P (fnaddr)
25120 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25121 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25122 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25123 else if (sibcall
25124 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25125 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25127 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25128 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25131 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25133 if (retval)
25135 /* For instrumented code we may have GPR + BR in parallel but
25136 it will confuse DF and we need to put each reg
25137 under EXPR_LIST. */
25138 if (chkp_function_instrumented_p (current_function_decl))
25139 chkp_put_regs_to_expr_list (retval);
25141 call = gen_rtx_SET (VOIDmode, retval, call);
25143 vec[vec_len++] = call;
25145 /* b0 and b1 registers hold bounds for returned value. */
25146 if (retval)
25148 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25149 rtx unspec0 = gen_rtx_UNSPEC (BND64mode,
25150 gen_rtvec (1, b0), UNSPEC_BNDRET);
25151 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25152 rtx unspec1 = gen_rtx_UNSPEC (BND64mode,
25153 gen_rtvec (1, b1), UNSPEC_BNDRET);
25154 vec[vec_len++] = gen_rtx_SET (BND64mode, b0, unspec0);
25155 vec[vec_len++] = gen_rtx_SET (BND64mode, b1, unspec1);
25158 if (pop)
25160 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25161 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25162 vec[vec_len++] = pop;
25165 if (TARGET_64BIT_MS_ABI
25166 && (!callarg2 || INTVAL (callarg2) != -2))
25168 unsigned i;
25170 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
25171 UNSPEC_MS_TO_SYSV_CALL);
25173 for (i = 0; i < cregs_size; i++)
25175 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25176 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25178 vec[vec_len++]
25179 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
25183 if (vec_len > 1)
25184 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25185 call = emit_call_insn (call);
25186 if (use)
25187 CALL_INSN_FUNCTION_USAGE (call) = use;
25189 return call;
25192 /* Output the assembly for a call instruction. */
25194 const char *
25195 ix86_output_call_insn (rtx insn, rtx call_op)
25197 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25198 bool seh_nop_p = false;
25199 const char *xasm;
25201 if (SIBLING_CALL_P (insn))
25203 if (direct_p)
25204 xasm = "%!jmp\t%P0";
25205 /* SEH epilogue detection requires the indirect branch case
25206 to include REX.W. */
25207 else if (TARGET_SEH)
25208 xasm = "%!rex.W jmp %A0";
25209 else
25210 xasm = "%!jmp\t%A0";
25212 output_asm_insn (xasm, &call_op);
25213 return "";
25216 /* SEH unwinding can require an extra nop to be emitted in several
25217 circumstances. Determine if we have one of those. */
25218 if (TARGET_SEH)
25220 rtx i;
25222 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25224 /* If we get to another real insn, we don't need the nop. */
25225 if (INSN_P (i))
25226 break;
25228 /* If we get to the epilogue note, prevent a catch region from
25229 being adjacent to the standard epilogue sequence. If non-
25230 call-exceptions, we'll have done this during epilogue emission. */
25231 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25232 && !flag_non_call_exceptions
25233 && !can_throw_internal (insn))
25235 seh_nop_p = true;
25236 break;
25240 /* If we didn't find a real insn following the call, prevent the
25241 unwinder from looking into the next function. */
25242 if (i == NULL)
25243 seh_nop_p = true;
25246 if (direct_p)
25247 xasm = "%!call\t%P0";
25248 else
25249 xasm = "%!call\t%A0";
25251 output_asm_insn (xasm, &call_op);
25253 if (seh_nop_p)
25254 return "nop";
25256 return "";
25259 /* Clear stack slot assignments remembered from previous functions.
25260 This is called from INIT_EXPANDERS once before RTL is emitted for each
25261 function. */
25263 static struct machine_function *
25264 ix86_init_machine_status (void)
25266 struct machine_function *f;
25268 f = ggc_alloc_cleared_machine_function ();
25269 f->use_fast_prologue_epilogue_nregs = -1;
25270 f->call_abi = ix86_abi;
25272 return f;
25275 /* Return a MEM corresponding to a stack slot with mode MODE.
25276 Allocate a new slot if necessary.
25278 The RTL for a function can have several slots available: N is
25279 which slot to use. */
25282 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25284 struct stack_local_entry *s;
25286 gcc_assert (n < MAX_386_STACK_LOCALS);
25288 for (s = ix86_stack_locals; s; s = s->next)
25289 if (s->mode == mode && s->n == n)
25290 return validize_mem (copy_rtx (s->rtl));
25292 s = ggc_alloc_stack_local_entry ();
25293 s->n = n;
25294 s->mode = mode;
25295 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25297 s->next = ix86_stack_locals;
25298 ix86_stack_locals = s;
25299 return validize_mem (s->rtl);
25302 static void
25303 ix86_instantiate_decls (void)
25305 struct stack_local_entry *s;
25307 for (s = ix86_stack_locals; s; s = s->next)
25308 if (s->rtl != NULL_RTX)
25309 instantiate_decl_rtl (s->rtl);
25312 /* Check whether x86 address PARTS is a pc-relative address. */
25314 static bool
25315 rip_relative_addr_p (struct ix86_address *parts)
25317 rtx base, index, disp;
25319 base = parts->base;
25320 index = parts->index;
25321 disp = parts->disp;
25323 if (disp && !base && !index)
25325 if (TARGET_64BIT)
25327 rtx symbol = disp;
25329 if (GET_CODE (disp) == CONST)
25330 symbol = XEXP (disp, 0);
25331 if (GET_CODE (symbol) == PLUS
25332 && CONST_INT_P (XEXP (symbol, 1)))
25333 symbol = XEXP (symbol, 0);
25335 if (GET_CODE (symbol) == LABEL_REF
25336 || (GET_CODE (symbol) == SYMBOL_REF
25337 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25338 || (GET_CODE (symbol) == UNSPEC
25339 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25340 || XINT (symbol, 1) == UNSPEC_PCREL
25341 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25342 return true;
25345 return false;
25348 /* Calculate the length of the memory address in the instruction encoding.
25349 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25350 or other prefixes. We never generate addr32 prefix for LEA insn. */
25353 memory_address_length (rtx addr, bool lea)
25355 struct ix86_address parts;
25356 rtx base, index, disp;
25357 int len;
25358 int ok;
25360 if (GET_CODE (addr) == PRE_DEC
25361 || GET_CODE (addr) == POST_INC
25362 || GET_CODE (addr) == PRE_MODIFY
25363 || GET_CODE (addr) == POST_MODIFY)
25364 return 0;
25366 ok = ix86_decompose_address (addr, &parts);
25367 gcc_assert (ok);
25369 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25371 /* If this is not LEA instruction, add the length of addr32 prefix. */
25372 if (TARGET_64BIT && !lea
25373 && (SImode_address_operand (addr, VOIDmode)
25374 || (parts.base && GET_MODE (parts.base) == SImode)
25375 || (parts.index && GET_MODE (parts.index) == SImode)))
25376 len++;
25378 base = parts.base;
25379 index = parts.index;
25380 disp = parts.disp;
25382 if (base && GET_CODE (base) == SUBREG)
25383 base = SUBREG_REG (base);
25384 if (index && GET_CODE (index) == SUBREG)
25385 index = SUBREG_REG (index);
25387 gcc_assert (base == NULL_RTX || REG_P (base));
25388 gcc_assert (index == NULL_RTX || REG_P (index));
25390 /* Rule of thumb:
25391 - esp as the base always wants an index,
25392 - ebp as the base always wants a displacement,
25393 - r12 as the base always wants an index,
25394 - r13 as the base always wants a displacement. */
25396 /* Register Indirect. */
25397 if (base && !index && !disp)
25399 /* esp (for its index) and ebp (for its displacement) need
25400 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25401 code. */
25402 if (base == arg_pointer_rtx
25403 || base == frame_pointer_rtx
25404 || REGNO (base) == SP_REG
25405 || REGNO (base) == BP_REG
25406 || REGNO (base) == R12_REG
25407 || REGNO (base) == R13_REG)
25408 len++;
25411 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25412 is not disp32, but disp32(%rip), so for disp32
25413 SIB byte is needed, unless print_operand_address
25414 optimizes it into disp32(%rip) or (%rip) is implied
25415 by UNSPEC. */
25416 else if (disp && !base && !index)
25418 len += 4;
25419 if (rip_relative_addr_p (&parts))
25420 len++;
25422 else
25424 /* Find the length of the displacement constant. */
25425 if (disp)
25427 if (base && satisfies_constraint_K (disp))
25428 len += 1;
25429 else
25430 len += 4;
25432 /* ebp always wants a displacement. Similarly r13. */
25433 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25434 len++;
25436 /* An index requires the two-byte modrm form.... */
25437 if (index
25438 /* ...like esp (or r12), which always wants an index. */
25439 || base == arg_pointer_rtx
25440 || base == frame_pointer_rtx
25441 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25442 len++;
25445 return len;
25448 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25449 is set, expect that insn have 8bit immediate alternative. */
25451 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25453 int len = 0;
25454 int i;
25455 extract_insn_cached (insn);
25456 for (i = recog_data.n_operands - 1; i >= 0; --i)
25457 if (CONSTANT_P (recog_data.operand[i]))
25459 enum attr_mode mode = get_attr_mode (insn);
25461 gcc_assert (!len);
25462 if (shortform && CONST_INT_P (recog_data.operand[i]))
25464 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25465 switch (mode)
25467 case MODE_QI:
25468 len = 1;
25469 continue;
25470 case MODE_HI:
25471 ival = trunc_int_for_mode (ival, HImode);
25472 break;
25473 case MODE_SI:
25474 ival = trunc_int_for_mode (ival, SImode);
25475 break;
25476 default:
25477 break;
25479 if (IN_RANGE (ival, -128, 127))
25481 len = 1;
25482 continue;
25485 switch (mode)
25487 case MODE_QI:
25488 len = 1;
25489 break;
25490 case MODE_HI:
25491 len = 2;
25492 break;
25493 case MODE_SI:
25494 len = 4;
25495 break;
25496 /* Immediates for DImode instructions are encoded
25497 as 32bit sign extended values. */
25498 case MODE_DI:
25499 len = 4;
25500 break;
25501 default:
25502 fatal_insn ("unknown insn mode", insn);
25505 return len;
25508 /* Compute default value for "length_address" attribute. */
25510 ix86_attr_length_address_default (rtx insn)
25512 int i;
25514 if (get_attr_type (insn) == TYPE_LEA)
25516 rtx set = PATTERN (insn), addr;
25518 if (GET_CODE (set) == PARALLEL)
25519 set = XVECEXP (set, 0, 0);
25521 gcc_assert (GET_CODE (set) == SET);
25523 addr = SET_SRC (set);
25525 return memory_address_length (addr, true);
25528 extract_insn_cached (insn);
25529 for (i = recog_data.n_operands - 1; i >= 0; --i)
25530 if (MEM_P (recog_data.operand[i]))
25532 constrain_operands_cached (reload_completed);
25533 if (which_alternative != -1)
25535 const char *constraints = recog_data.constraints[i];
25536 int alt = which_alternative;
25538 while (*constraints == '=' || *constraints == '+')
25539 constraints++;
25540 while (alt-- > 0)
25541 while (*constraints++ != ',')
25543 /* Skip ignored operands. */
25544 if (*constraints == 'X')
25545 continue;
25547 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25549 return 0;
25552 /* Compute default value for "length_vex" attribute. It includes
25553 2 or 3 byte VEX prefix and 1 opcode byte. */
25556 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25558 int i;
25560 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25561 byte VEX prefix. */
25562 if (!has_0f_opcode || has_vex_w)
25563 return 3 + 1;
25565 /* We can always use 2 byte VEX prefix in 32bit. */
25566 if (!TARGET_64BIT)
25567 return 2 + 1;
25569 extract_insn_cached (insn);
25571 for (i = recog_data.n_operands - 1; i >= 0; --i)
25572 if (REG_P (recog_data.operand[i]))
25574 /* REX.W bit uses 3 byte VEX prefix. */
25575 if (GET_MODE (recog_data.operand[i]) == DImode
25576 && GENERAL_REG_P (recog_data.operand[i]))
25577 return 3 + 1;
25579 else
25581 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25582 if (MEM_P (recog_data.operand[i])
25583 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25584 return 3 + 1;
25587 return 2 + 1;
25590 /* Return the maximum number of instructions a cpu can issue. */
25592 static int
25593 ix86_issue_rate (void)
25595 switch (ix86_tune)
25597 case PROCESSOR_PENTIUM:
25598 case PROCESSOR_BONNELL:
25599 case PROCESSOR_SILVERMONT:
25600 case PROCESSOR_INTEL:
25601 case PROCESSOR_K6:
25602 case PROCESSOR_BTVER2:
25603 case PROCESSOR_PENTIUM4:
25604 case PROCESSOR_NOCONA:
25605 return 2;
25607 case PROCESSOR_PENTIUMPRO:
25608 case PROCESSOR_ATHLON:
25609 case PROCESSOR_K8:
25610 case PROCESSOR_AMDFAM10:
25611 case PROCESSOR_GENERIC:
25612 case PROCESSOR_BTVER1:
25613 return 3;
25615 case PROCESSOR_BDVER1:
25616 case PROCESSOR_BDVER2:
25617 case PROCESSOR_BDVER3:
25618 case PROCESSOR_BDVER4:
25619 case PROCESSOR_CORE2:
25620 case PROCESSOR_NEHALEM:
25621 case PROCESSOR_SANDYBRIDGE:
25622 case PROCESSOR_HASWELL:
25623 return 4;
25625 default:
25626 return 1;
25630 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25631 by DEP_INSN and nothing set by DEP_INSN. */
25633 static bool
25634 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25636 rtx set, set2;
25638 /* Simplify the test for uninteresting insns. */
25639 if (insn_type != TYPE_SETCC
25640 && insn_type != TYPE_ICMOV
25641 && insn_type != TYPE_FCMOV
25642 && insn_type != TYPE_IBR)
25643 return false;
25645 if ((set = single_set (dep_insn)) != 0)
25647 set = SET_DEST (set);
25648 set2 = NULL_RTX;
25650 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25651 && XVECLEN (PATTERN (dep_insn), 0) == 2
25652 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25653 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25655 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25656 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25658 else
25659 return false;
25661 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25662 return false;
25664 /* This test is true if the dependent insn reads the flags but
25665 not any other potentially set register. */
25666 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25667 return false;
25669 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25670 return false;
25672 return true;
25675 /* Return true iff USE_INSN has a memory address with operands set by
25676 SET_INSN. */
25678 bool
25679 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25681 int i;
25682 extract_insn_cached (use_insn);
25683 for (i = recog_data.n_operands - 1; i >= 0; --i)
25684 if (MEM_P (recog_data.operand[i]))
25686 rtx addr = XEXP (recog_data.operand[i], 0);
25687 return modified_in_p (addr, set_insn) != 0;
25689 return false;
25692 /* Helper function for exact_store_load_dependency.
25693 Return true if addr is found in insn. */
25694 static bool
25695 exact_dependency_1 (rtx addr, rtx insn)
25697 enum rtx_code code;
25698 const char *format_ptr;
25699 int i, j;
25701 code = GET_CODE (insn);
25702 switch (code)
25704 case MEM:
25705 if (rtx_equal_p (addr, insn))
25706 return true;
25707 break;
25708 case REG:
25709 CASE_CONST_ANY:
25710 case SYMBOL_REF:
25711 case CODE_LABEL:
25712 case PC:
25713 case CC0:
25714 case EXPR_LIST:
25715 return false;
25716 default:
25717 break;
25720 format_ptr = GET_RTX_FORMAT (code);
25721 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25723 switch (*format_ptr++)
25725 case 'e':
25726 if (exact_dependency_1 (addr, XEXP (insn, i)))
25727 return true;
25728 break;
25729 case 'E':
25730 for (j = 0; j < XVECLEN (insn, i); j++)
25731 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25732 return true;
25733 break;
25736 return false;
25739 /* Return true if there exists exact dependency for store & load, i.e.
25740 the same memory address is used in them. */
25741 static bool
25742 exact_store_load_dependency (rtx store, rtx load)
25744 rtx set1, set2;
25746 set1 = single_set (store);
25747 if (!set1)
25748 return false;
25749 if (!MEM_P (SET_DEST (set1)))
25750 return false;
25751 set2 = single_set (load);
25752 if (!set2)
25753 return false;
25754 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25755 return true;
25756 return false;
25759 static int
25760 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25762 enum attr_type insn_type, dep_insn_type;
25763 enum attr_memory memory;
25764 rtx set, set2;
25765 int dep_insn_code_number;
25767 /* Anti and output dependencies have zero cost on all CPUs. */
25768 if (REG_NOTE_KIND (link) != 0)
25769 return 0;
25771 dep_insn_code_number = recog_memoized (dep_insn);
25773 /* If we can't recognize the insns, we can't really do anything. */
25774 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25775 return cost;
25777 insn_type = get_attr_type (insn);
25778 dep_insn_type = get_attr_type (dep_insn);
25780 switch (ix86_tune)
25782 case PROCESSOR_PENTIUM:
25783 /* Address Generation Interlock adds a cycle of latency. */
25784 if (insn_type == TYPE_LEA)
25786 rtx addr = PATTERN (insn);
25788 if (GET_CODE (addr) == PARALLEL)
25789 addr = XVECEXP (addr, 0, 0);
25791 gcc_assert (GET_CODE (addr) == SET);
25793 addr = SET_SRC (addr);
25794 if (modified_in_p (addr, dep_insn))
25795 cost += 1;
25797 else if (ix86_agi_dependent (dep_insn, insn))
25798 cost += 1;
25800 /* ??? Compares pair with jump/setcc. */
25801 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25802 cost = 0;
25804 /* Floating point stores require value to be ready one cycle earlier. */
25805 if (insn_type == TYPE_FMOV
25806 && get_attr_memory (insn) == MEMORY_STORE
25807 && !ix86_agi_dependent (dep_insn, insn))
25808 cost += 1;
25809 break;
25811 case PROCESSOR_PENTIUMPRO:
25812 /* INT->FP conversion is expensive. */
25813 if (get_attr_fp_int_src (dep_insn))
25814 cost += 5;
25816 /* There is one cycle extra latency between an FP op and a store. */
25817 if (insn_type == TYPE_FMOV
25818 && (set = single_set (dep_insn)) != NULL_RTX
25819 && (set2 = single_set (insn)) != NULL_RTX
25820 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25821 && MEM_P (SET_DEST (set2)))
25822 cost += 1;
25824 memory = get_attr_memory (insn);
25826 /* Show ability of reorder buffer to hide latency of load by executing
25827 in parallel with previous instruction in case
25828 previous instruction is not needed to compute the address. */
25829 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25830 && !ix86_agi_dependent (dep_insn, insn))
25832 /* Claim moves to take one cycle, as core can issue one load
25833 at time and the next load can start cycle later. */
25834 if (dep_insn_type == TYPE_IMOV
25835 || dep_insn_type == TYPE_FMOV)
25836 cost = 1;
25837 else if (cost > 1)
25838 cost--;
25840 break;
25842 case PROCESSOR_K6:
25843 /* The esp dependency is resolved before
25844 the instruction is really finished. */
25845 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25846 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25847 return 1;
25849 /* INT->FP conversion is expensive. */
25850 if (get_attr_fp_int_src (dep_insn))
25851 cost += 5;
25853 memory = get_attr_memory (insn);
25855 /* Show ability of reorder buffer to hide latency of load by executing
25856 in parallel with previous instruction in case
25857 previous instruction is not needed to compute the address. */
25858 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25859 && !ix86_agi_dependent (dep_insn, insn))
25861 /* Claim moves to take one cycle, as core can issue one load
25862 at time and the next load can start cycle later. */
25863 if (dep_insn_type == TYPE_IMOV
25864 || dep_insn_type == TYPE_FMOV)
25865 cost = 1;
25866 else if (cost > 2)
25867 cost -= 2;
25868 else
25869 cost = 1;
25871 break;
25873 case PROCESSOR_AMDFAM10:
25874 case PROCESSOR_BDVER1:
25875 case PROCESSOR_BDVER2:
25876 case PROCESSOR_BDVER3:
25877 case PROCESSOR_BDVER4:
25878 case PROCESSOR_BTVER1:
25879 case PROCESSOR_BTVER2:
25880 case PROCESSOR_GENERIC:
25881 /* Stack engine allows to execute push&pop instructions in parall. */
25882 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25883 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25884 return 0;
25885 /* FALLTHRU */
25887 case PROCESSOR_ATHLON:
25888 case PROCESSOR_K8:
25889 memory = get_attr_memory (insn);
25891 /* Show ability of reorder buffer to hide latency of load by executing
25892 in parallel with previous instruction in case
25893 previous instruction is not needed to compute the address. */
25894 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25895 && !ix86_agi_dependent (dep_insn, insn))
25897 enum attr_unit unit = get_attr_unit (insn);
25898 int loadcost = 3;
25900 /* Because of the difference between the length of integer and
25901 floating unit pipeline preparation stages, the memory operands
25902 for floating point are cheaper.
25904 ??? For Athlon it the difference is most probably 2. */
25905 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25906 loadcost = 3;
25907 else
25908 loadcost = TARGET_ATHLON ? 2 : 0;
25910 if (cost >= loadcost)
25911 cost -= loadcost;
25912 else
25913 cost = 0;
25915 break;
25917 case PROCESSOR_CORE2:
25918 case PROCESSOR_NEHALEM:
25919 case PROCESSOR_SANDYBRIDGE:
25920 case PROCESSOR_HASWELL:
25921 /* Stack engine allows to execute push&pop instructions in parall. */
25922 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25923 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25924 return 0;
25926 memory = get_attr_memory (insn);
25928 /* Show ability of reorder buffer to hide latency of load by executing
25929 in parallel with previous instruction in case
25930 previous instruction is not needed to compute the address. */
25931 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25932 && !ix86_agi_dependent (dep_insn, insn))
25934 if (cost >= 4)
25935 cost -= 4;
25936 else
25937 cost = 0;
25939 break;
25941 case PROCESSOR_SILVERMONT:
25942 case PROCESSOR_INTEL:
25943 if (!reload_completed)
25944 return cost;
25946 /* Increase cost of integer loads. */
25947 memory = get_attr_memory (dep_insn);
25948 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25950 enum attr_unit unit = get_attr_unit (dep_insn);
25951 if (unit == UNIT_INTEGER && cost == 1)
25953 if (memory == MEMORY_LOAD)
25954 cost = 3;
25955 else
25957 /* Increase cost of ld/st for short int types only
25958 because of store forwarding issue. */
25959 rtx set = single_set (dep_insn);
25960 if (set && (GET_MODE (SET_DEST (set)) == QImode
25961 || GET_MODE (SET_DEST (set)) == HImode))
25963 /* Increase cost of store/load insn if exact
25964 dependence exists and it is load insn. */
25965 enum attr_memory insn_memory = get_attr_memory (insn);
25966 if (insn_memory == MEMORY_LOAD
25967 && exact_store_load_dependency (dep_insn, insn))
25968 cost = 3;
25974 default:
25975 break;
25978 return cost;
25981 /* How many alternative schedules to try. This should be as wide as the
25982 scheduling freedom in the DFA, but no wider. Making this value too
25983 large results extra work for the scheduler. */
25985 static int
25986 ia32_multipass_dfa_lookahead (void)
25988 switch (ix86_tune)
25990 case PROCESSOR_PENTIUM:
25991 return 2;
25993 case PROCESSOR_PENTIUMPRO:
25994 case PROCESSOR_K6:
25995 return 1;
25997 case PROCESSOR_BDVER1:
25998 case PROCESSOR_BDVER2:
25999 case PROCESSOR_BDVER3:
26000 case PROCESSOR_BDVER4:
26001 /* We use lookahead value 4 for BD both before and after reload
26002 schedules. Plan is to have value 8 included for O3. */
26003 return 4;
26005 case PROCESSOR_CORE2:
26006 case PROCESSOR_NEHALEM:
26007 case PROCESSOR_SANDYBRIDGE:
26008 case PROCESSOR_HASWELL:
26009 case PROCESSOR_BONNELL:
26010 case PROCESSOR_SILVERMONT:
26011 case PROCESSOR_INTEL:
26012 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26013 as many instructions can be executed on a cycle, i.e.,
26014 issue_rate. I wonder why tuning for many CPUs does not do this. */
26015 if (reload_completed)
26016 return ix86_issue_rate ();
26017 /* Don't use lookahead for pre-reload schedule to save compile time. */
26018 return 0;
26020 default:
26021 return 0;
26025 /* Return true if target platform supports macro-fusion. */
26027 static bool
26028 ix86_macro_fusion_p ()
26030 return TARGET_FUSE_CMP_AND_BRANCH;
26033 /* Check whether current microarchitecture support macro fusion
26034 for insn pair "CONDGEN + CONDJMP". Refer to
26035 "Intel Architectures Optimization Reference Manual". */
26037 static bool
26038 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
26040 rtx src, dest;
26041 rtx single_set = single_set (condgen);
26042 enum rtx_code ccode;
26043 rtx compare_set = NULL_RTX, test_if, cond;
26044 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26046 if (get_attr_type (condgen) != TYPE_TEST
26047 && get_attr_type (condgen) != TYPE_ICMP
26048 && get_attr_type (condgen) != TYPE_INCDEC
26049 && get_attr_type (condgen) != TYPE_ALU)
26050 return false;
26052 if (single_set == NULL_RTX
26053 && !TARGET_FUSE_ALU_AND_BRANCH)
26054 return false;
26056 if (single_set != NULL_RTX)
26057 compare_set = single_set;
26058 else
26060 int i;
26061 rtx pat = PATTERN (condgen);
26062 for (i = 0; i < XVECLEN (pat, 0); i++)
26063 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26065 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26066 if (GET_CODE (set_src) == COMPARE)
26067 compare_set = XVECEXP (pat, 0, i);
26068 else
26069 alu_set = XVECEXP (pat, 0, i);
26072 if (compare_set == NULL_RTX)
26073 return false;
26074 src = SET_SRC (compare_set);
26075 if (GET_CODE (src) != COMPARE)
26076 return false;
26078 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26079 supported. */
26080 if ((MEM_P (XEXP (src, 0))
26081 && CONST_INT_P (XEXP (src, 1)))
26082 || (MEM_P (XEXP (src, 1))
26083 && CONST_INT_P (XEXP (src, 0))))
26084 return false;
26086 /* No fusion for RIP-relative address. */
26087 if (MEM_P (XEXP (src, 0)))
26088 addr = XEXP (XEXP (src, 0), 0);
26089 else if (MEM_P (XEXP (src, 1)))
26090 addr = XEXP (XEXP (src, 1), 0);
26092 if (addr) {
26093 ix86_address parts;
26094 int ok = ix86_decompose_address (addr, &parts);
26095 gcc_assert (ok);
26097 if (rip_relative_addr_p (&parts))
26098 return false;
26101 test_if = SET_SRC (pc_set (condjmp));
26102 cond = XEXP (test_if, 0);
26103 ccode = GET_CODE (cond);
26104 /* Check whether conditional jump use Sign or Overflow Flags. */
26105 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26106 && (ccode == GE
26107 || ccode == GT
26108 || ccode == LE
26109 || ccode == LT))
26110 return false;
26112 /* Return true for TYPE_TEST and TYPE_ICMP. */
26113 if (get_attr_type (condgen) == TYPE_TEST
26114 || get_attr_type (condgen) == TYPE_ICMP)
26115 return true;
26117 /* The following is the case that macro-fusion for alu + jmp. */
26118 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26119 return false;
26121 /* No fusion for alu op with memory destination operand. */
26122 dest = SET_DEST (alu_set);
26123 if (MEM_P (dest))
26124 return false;
26126 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26127 supported. */
26128 if (get_attr_type (condgen) == TYPE_INCDEC
26129 && (ccode == GEU
26130 || ccode == GTU
26131 || ccode == LEU
26132 || ccode == LTU))
26133 return false;
26135 return true;
26138 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26139 execution. It is applied if
26140 (1) IMUL instruction is on the top of list;
26141 (2) There exists the only producer of independent IMUL instruction in
26142 ready list.
26143 Return index of IMUL producer if it was found and -1 otherwise. */
26144 static int
26145 do_reorder_for_imul (rtx *ready, int n_ready)
26147 rtx insn, set, insn1, insn2;
26148 sd_iterator_def sd_it;
26149 dep_t dep;
26150 int index = -1;
26151 int i;
26153 if (!TARGET_BONNELL)
26154 return index;
26156 /* Check that IMUL instruction is on the top of ready list. */
26157 insn = ready[n_ready - 1];
26158 set = single_set (insn);
26159 if (!set)
26160 return index;
26161 if (!(GET_CODE (SET_SRC (set)) == MULT
26162 && GET_MODE (SET_SRC (set)) == SImode))
26163 return index;
26165 /* Search for producer of independent IMUL instruction. */
26166 for (i = n_ready - 2; i >= 0; i--)
26168 insn = ready[i];
26169 if (!NONDEBUG_INSN_P (insn))
26170 continue;
26171 /* Skip IMUL instruction. */
26172 insn2 = PATTERN (insn);
26173 if (GET_CODE (insn2) == PARALLEL)
26174 insn2 = XVECEXP (insn2, 0, 0);
26175 if (GET_CODE (insn2) == SET
26176 && GET_CODE (SET_SRC (insn2)) == MULT
26177 && GET_MODE (SET_SRC (insn2)) == SImode)
26178 continue;
26180 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26182 rtx con;
26183 con = DEP_CON (dep);
26184 if (!NONDEBUG_INSN_P (con))
26185 continue;
26186 insn1 = PATTERN (con);
26187 if (GET_CODE (insn1) == PARALLEL)
26188 insn1 = XVECEXP (insn1, 0, 0);
26190 if (GET_CODE (insn1) == SET
26191 && GET_CODE (SET_SRC (insn1)) == MULT
26192 && GET_MODE (SET_SRC (insn1)) == SImode)
26194 sd_iterator_def sd_it1;
26195 dep_t dep1;
26196 /* Check if there is no other dependee for IMUL. */
26197 index = i;
26198 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26200 rtx pro;
26201 pro = DEP_PRO (dep1);
26202 if (!NONDEBUG_INSN_P (pro))
26203 continue;
26204 if (pro != insn)
26205 index = -1;
26207 if (index >= 0)
26208 break;
26211 if (index >= 0)
26212 break;
26214 return index;
26217 /* Try to find the best candidate on the top of ready list if two insns
26218 have the same priority - candidate is best if its dependees were
26219 scheduled earlier. Applied for Silvermont only.
26220 Return true if top 2 insns must be interchanged. */
26221 static bool
26222 swap_top_of_ready_list (rtx *ready, int n_ready)
26224 rtx top = ready[n_ready - 1];
26225 rtx next = ready[n_ready - 2];
26226 rtx set;
26227 sd_iterator_def sd_it;
26228 dep_t dep;
26229 int clock1 = -1;
26230 int clock2 = -1;
26231 #define INSN_TICK(INSN) (HID (INSN)->tick)
26233 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26234 return false;
26236 if (!NONDEBUG_INSN_P (top))
26237 return false;
26238 if (!NONJUMP_INSN_P (top))
26239 return false;
26240 if (!NONDEBUG_INSN_P (next))
26241 return false;
26242 if (!NONJUMP_INSN_P (next))
26243 return false;
26244 set = single_set (top);
26245 if (!set)
26246 return false;
26247 set = single_set (next);
26248 if (!set)
26249 return false;
26251 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26253 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26254 return false;
26255 /* Determine winner more precise. */
26256 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26258 rtx pro;
26259 pro = DEP_PRO (dep);
26260 if (!NONDEBUG_INSN_P (pro))
26261 continue;
26262 if (INSN_TICK (pro) > clock1)
26263 clock1 = INSN_TICK (pro);
26265 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26267 rtx pro;
26268 pro = DEP_PRO (dep);
26269 if (!NONDEBUG_INSN_P (pro))
26270 continue;
26271 if (INSN_TICK (pro) > clock2)
26272 clock2 = INSN_TICK (pro);
26275 if (clock1 == clock2)
26277 /* Determine winner - load must win. */
26278 enum attr_memory memory1, memory2;
26279 memory1 = get_attr_memory (top);
26280 memory2 = get_attr_memory (next);
26281 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26282 return true;
26284 return (bool) (clock2 < clock1);
26286 return false;
26287 #undef INSN_TICK
26290 /* Perform possible reodering of ready list for Atom/Silvermont only.
26291 Return issue rate. */
26292 static int
26293 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26294 int clock_var)
26296 int issue_rate = -1;
26297 int n_ready = *pn_ready;
26298 int i;
26299 rtx insn;
26300 int index = -1;
26302 /* Set up issue rate. */
26303 issue_rate = ix86_issue_rate ();
26305 /* Do reodering for BONNELL/SILVERMONT only. */
26306 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26307 return issue_rate;
26309 /* Nothing to do if ready list contains only 1 instruction. */
26310 if (n_ready <= 1)
26311 return issue_rate;
26313 /* Do reodering for post-reload scheduler only. */
26314 if (!reload_completed)
26315 return issue_rate;
26317 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26319 if (sched_verbose > 1)
26320 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26321 INSN_UID (ready[index]));
26323 /* Put IMUL producer (ready[index]) at the top of ready list. */
26324 insn = ready[index];
26325 for (i = index; i < n_ready - 1; i++)
26326 ready[i] = ready[i + 1];
26327 ready[n_ready - 1] = insn;
26328 return issue_rate;
26330 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26332 if (sched_verbose > 1)
26333 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26334 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26335 /* Swap 2 top elements of ready list. */
26336 insn = ready[n_ready - 1];
26337 ready[n_ready - 1] = ready[n_ready - 2];
26338 ready[n_ready - 2] = insn;
26340 return issue_rate;
26343 static bool
26344 ix86_class_likely_spilled_p (reg_class_t);
26346 /* Returns true if lhs of insn is HW function argument register and set up
26347 is_spilled to true if it is likely spilled HW register. */
26348 static bool
26349 insn_is_function_arg (rtx insn, bool* is_spilled)
26351 rtx dst;
26353 if (!NONDEBUG_INSN_P (insn))
26354 return false;
26355 /* Call instructions are not movable, ignore it. */
26356 if (CALL_P (insn))
26357 return false;
26358 insn = PATTERN (insn);
26359 if (GET_CODE (insn) == PARALLEL)
26360 insn = XVECEXP (insn, 0, 0);
26361 if (GET_CODE (insn) != SET)
26362 return false;
26363 dst = SET_DEST (insn);
26364 if (REG_P (dst) && HARD_REGISTER_P (dst)
26365 && ix86_function_arg_regno_p (REGNO (dst)))
26367 /* Is it likely spilled HW register? */
26368 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26369 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26370 *is_spilled = true;
26371 return true;
26373 return false;
26376 /* Add output dependencies for chain of function adjacent arguments if only
26377 there is a move to likely spilled HW register. Return first argument
26378 if at least one dependence was added or NULL otherwise. */
26379 static rtx
26380 add_parameter_dependencies (rtx call, rtx head)
26382 rtx insn;
26383 rtx last = call;
26384 rtx first_arg = NULL;
26385 bool is_spilled = false;
26387 head = PREV_INSN (head);
26389 /* Find nearest to call argument passing instruction. */
26390 while (true)
26392 last = PREV_INSN (last);
26393 if (last == head)
26394 return NULL;
26395 if (!NONDEBUG_INSN_P (last))
26396 continue;
26397 if (insn_is_function_arg (last, &is_spilled))
26398 break;
26399 return NULL;
26402 first_arg = last;
26403 while (true)
26405 insn = PREV_INSN (last);
26406 if (!INSN_P (insn))
26407 break;
26408 if (insn == head)
26409 break;
26410 if (!NONDEBUG_INSN_P (insn))
26412 last = insn;
26413 continue;
26415 if (insn_is_function_arg (insn, &is_spilled))
26417 /* Add output depdendence between two function arguments if chain
26418 of output arguments contains likely spilled HW registers. */
26419 if (is_spilled)
26420 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26421 first_arg = last = insn;
26423 else
26424 break;
26426 if (!is_spilled)
26427 return NULL;
26428 return first_arg;
26431 /* Add output or anti dependency from insn to first_arg to restrict its code
26432 motion. */
26433 static void
26434 avoid_func_arg_motion (rtx first_arg, rtx insn)
26436 rtx set;
26437 rtx tmp;
26439 set = single_set (insn);
26440 if (!set)
26441 return;
26442 tmp = SET_DEST (set);
26443 if (REG_P (tmp))
26445 /* Add output dependency to the first function argument. */
26446 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26447 return;
26449 /* Add anti dependency. */
26450 add_dependence (first_arg, insn, REG_DEP_ANTI);
26453 /* Avoid cross block motion of function argument through adding dependency
26454 from the first non-jump instruction in bb. */
26455 static void
26456 add_dependee_for_func_arg (rtx arg, basic_block bb)
26458 rtx insn = BB_END (bb);
26460 while (insn)
26462 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26464 rtx set = single_set (insn);
26465 if (set)
26467 avoid_func_arg_motion (arg, insn);
26468 return;
26471 if (insn == BB_HEAD (bb))
26472 return;
26473 insn = PREV_INSN (insn);
26477 /* Hook for pre-reload schedule - avoid motion of function arguments
26478 passed in likely spilled HW registers. */
26479 static void
26480 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26482 rtx insn;
26483 rtx first_arg = NULL;
26484 if (reload_completed)
26485 return;
26486 while (head != tail && DEBUG_INSN_P (head))
26487 head = NEXT_INSN (head);
26488 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26489 if (INSN_P (insn) && CALL_P (insn))
26491 first_arg = add_parameter_dependencies (insn, head);
26492 if (first_arg)
26494 /* Add dependee for first argument to predecessors if only
26495 region contains more than one block. */
26496 basic_block bb = BLOCK_FOR_INSN (insn);
26497 int rgn = CONTAINING_RGN (bb->index);
26498 int nr_blks = RGN_NR_BLOCKS (rgn);
26499 /* Skip trivial regions and region head blocks that can have
26500 predecessors outside of region. */
26501 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26503 edge e;
26504 edge_iterator ei;
26505 /* Assume that region is SCC, i.e. all immediate predecessors
26506 of non-head block are in the same region. */
26507 FOR_EACH_EDGE (e, ei, bb->preds)
26509 /* Avoid creating of loop-carried dependencies through
26510 using topological odering in region. */
26511 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26512 add_dependee_for_func_arg (first_arg, e->src);
26515 insn = first_arg;
26516 if (insn == head)
26517 break;
26520 else if (first_arg)
26521 avoid_func_arg_motion (first_arg, insn);
26524 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26525 HW registers to maximum, to schedule them at soon as possible. These are
26526 moves from function argument registers at the top of the function entry
26527 and moves from function return value registers after call. */
26528 static int
26529 ix86_adjust_priority (rtx insn, int priority)
26531 rtx set;
26533 if (reload_completed)
26534 return priority;
26536 if (!NONDEBUG_INSN_P (insn))
26537 return priority;
26539 set = single_set (insn);
26540 if (set)
26542 rtx tmp = SET_SRC (set);
26543 if (REG_P (tmp)
26544 && HARD_REGISTER_P (tmp)
26545 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26546 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26547 return current_sched_info->sched_max_insns_priority;
26550 return priority;
26553 /* Model decoder of Core 2/i7.
26554 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26555 track the instruction fetch block boundaries and make sure that long
26556 (9+ bytes) instructions are assigned to D0. */
26558 /* Maximum length of an insn that can be handled by
26559 a secondary decoder unit. '8' for Core 2/i7. */
26560 static int core2i7_secondary_decoder_max_insn_size;
26562 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26563 '16' for Core 2/i7. */
26564 static int core2i7_ifetch_block_size;
26566 /* Maximum number of instructions decoder can handle per cycle.
26567 '6' for Core 2/i7. */
26568 static int core2i7_ifetch_block_max_insns;
26570 typedef struct ix86_first_cycle_multipass_data_ *
26571 ix86_first_cycle_multipass_data_t;
26572 typedef const struct ix86_first_cycle_multipass_data_ *
26573 const_ix86_first_cycle_multipass_data_t;
26575 /* A variable to store target state across calls to max_issue within
26576 one cycle. */
26577 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26578 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26580 /* Initialize DATA. */
26581 static void
26582 core2i7_first_cycle_multipass_init (void *_data)
26584 ix86_first_cycle_multipass_data_t data
26585 = (ix86_first_cycle_multipass_data_t) _data;
26587 data->ifetch_block_len = 0;
26588 data->ifetch_block_n_insns = 0;
26589 data->ready_try_change = NULL;
26590 data->ready_try_change_size = 0;
26593 /* Advancing the cycle; reset ifetch block counts. */
26594 static void
26595 core2i7_dfa_post_advance_cycle (void)
26597 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26599 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26601 data->ifetch_block_len = 0;
26602 data->ifetch_block_n_insns = 0;
26605 static int min_insn_size (rtx);
26607 /* Filter out insns from ready_try that the core will not be able to issue
26608 on current cycle due to decoder. */
26609 static void
26610 core2i7_first_cycle_multipass_filter_ready_try
26611 (const_ix86_first_cycle_multipass_data_t data,
26612 char *ready_try, int n_ready, bool first_cycle_insn_p)
26614 while (n_ready--)
26616 rtx insn;
26617 int insn_size;
26619 if (ready_try[n_ready])
26620 continue;
26622 insn = get_ready_element (n_ready);
26623 insn_size = min_insn_size (insn);
26625 if (/* If this is a too long an insn for a secondary decoder ... */
26626 (!first_cycle_insn_p
26627 && insn_size > core2i7_secondary_decoder_max_insn_size)
26628 /* ... or it would not fit into the ifetch block ... */
26629 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26630 /* ... or the decoder is full already ... */
26631 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26632 /* ... mask the insn out. */
26634 ready_try[n_ready] = 1;
26636 if (data->ready_try_change)
26637 bitmap_set_bit (data->ready_try_change, n_ready);
26642 /* Prepare for a new round of multipass lookahead scheduling. */
26643 static void
26644 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26645 bool first_cycle_insn_p)
26647 ix86_first_cycle_multipass_data_t data
26648 = (ix86_first_cycle_multipass_data_t) _data;
26649 const_ix86_first_cycle_multipass_data_t prev_data
26650 = ix86_first_cycle_multipass_data;
26652 /* Restore the state from the end of the previous round. */
26653 data->ifetch_block_len = prev_data->ifetch_block_len;
26654 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26656 /* Filter instructions that cannot be issued on current cycle due to
26657 decoder restrictions. */
26658 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26659 first_cycle_insn_p);
26662 /* INSN is being issued in current solution. Account for its impact on
26663 the decoder model. */
26664 static void
26665 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26666 rtx insn, const void *_prev_data)
26668 ix86_first_cycle_multipass_data_t data
26669 = (ix86_first_cycle_multipass_data_t) _data;
26670 const_ix86_first_cycle_multipass_data_t prev_data
26671 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26673 int insn_size = min_insn_size (insn);
26675 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26676 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26677 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26678 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26680 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26681 if (!data->ready_try_change)
26683 data->ready_try_change = sbitmap_alloc (n_ready);
26684 data->ready_try_change_size = n_ready;
26686 else if (data->ready_try_change_size < n_ready)
26688 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26689 n_ready, 0);
26690 data->ready_try_change_size = n_ready;
26692 bitmap_clear (data->ready_try_change);
26694 /* Filter out insns from ready_try that the core will not be able to issue
26695 on current cycle due to decoder. */
26696 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26697 false);
26700 /* Revert the effect on ready_try. */
26701 static void
26702 core2i7_first_cycle_multipass_backtrack (const void *_data,
26703 char *ready_try,
26704 int n_ready ATTRIBUTE_UNUSED)
26706 const_ix86_first_cycle_multipass_data_t data
26707 = (const_ix86_first_cycle_multipass_data_t) _data;
26708 unsigned int i = 0;
26709 sbitmap_iterator sbi;
26711 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26712 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26714 ready_try[i] = 0;
26718 /* Save the result of multipass lookahead scheduling for the next round. */
26719 static void
26720 core2i7_first_cycle_multipass_end (const void *_data)
26722 const_ix86_first_cycle_multipass_data_t data
26723 = (const_ix86_first_cycle_multipass_data_t) _data;
26724 ix86_first_cycle_multipass_data_t next_data
26725 = ix86_first_cycle_multipass_data;
26727 if (data != NULL)
26729 next_data->ifetch_block_len = data->ifetch_block_len;
26730 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26734 /* Deallocate target data. */
26735 static void
26736 core2i7_first_cycle_multipass_fini (void *_data)
26738 ix86_first_cycle_multipass_data_t data
26739 = (ix86_first_cycle_multipass_data_t) _data;
26741 if (data->ready_try_change)
26743 sbitmap_free (data->ready_try_change);
26744 data->ready_try_change = NULL;
26745 data->ready_try_change_size = 0;
26749 /* Prepare for scheduling pass. */
26750 static void
26751 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26752 int verbose ATTRIBUTE_UNUSED,
26753 int max_uid ATTRIBUTE_UNUSED)
26755 /* Install scheduling hooks for current CPU. Some of these hooks are used
26756 in time-critical parts of the scheduler, so we only set them up when
26757 they are actually used. */
26758 switch (ix86_tune)
26760 case PROCESSOR_CORE2:
26761 case PROCESSOR_NEHALEM:
26762 case PROCESSOR_SANDYBRIDGE:
26763 case PROCESSOR_HASWELL:
26764 /* Do not perform multipass scheduling for pre-reload schedule
26765 to save compile time. */
26766 if (reload_completed)
26768 targetm.sched.dfa_post_advance_cycle
26769 = core2i7_dfa_post_advance_cycle;
26770 targetm.sched.first_cycle_multipass_init
26771 = core2i7_first_cycle_multipass_init;
26772 targetm.sched.first_cycle_multipass_begin
26773 = core2i7_first_cycle_multipass_begin;
26774 targetm.sched.first_cycle_multipass_issue
26775 = core2i7_first_cycle_multipass_issue;
26776 targetm.sched.first_cycle_multipass_backtrack
26777 = core2i7_first_cycle_multipass_backtrack;
26778 targetm.sched.first_cycle_multipass_end
26779 = core2i7_first_cycle_multipass_end;
26780 targetm.sched.first_cycle_multipass_fini
26781 = core2i7_first_cycle_multipass_fini;
26783 /* Set decoder parameters. */
26784 core2i7_secondary_decoder_max_insn_size = 8;
26785 core2i7_ifetch_block_size = 16;
26786 core2i7_ifetch_block_max_insns = 6;
26787 break;
26789 /* ... Fall through ... */
26790 default:
26791 targetm.sched.dfa_post_advance_cycle = NULL;
26792 targetm.sched.first_cycle_multipass_init = NULL;
26793 targetm.sched.first_cycle_multipass_begin = NULL;
26794 targetm.sched.first_cycle_multipass_issue = NULL;
26795 targetm.sched.first_cycle_multipass_backtrack = NULL;
26796 targetm.sched.first_cycle_multipass_end = NULL;
26797 targetm.sched.first_cycle_multipass_fini = NULL;
26798 break;
26803 /* Compute the alignment given to a constant that is being placed in memory.
26804 EXP is the constant and ALIGN is the alignment that the object would
26805 ordinarily have.
26806 The value of this function is used instead of that alignment to align
26807 the object. */
26810 ix86_constant_alignment (tree exp, int align)
26812 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26813 || TREE_CODE (exp) == INTEGER_CST)
26815 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26816 return 64;
26817 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26818 return 128;
26820 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26821 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26822 return BITS_PER_WORD;
26824 return align;
26827 /* Compute the alignment for a static variable.
26828 TYPE is the data type, and ALIGN is the alignment that
26829 the object would ordinarily have. The value of this function is used
26830 instead of that alignment to align the object. */
26833 ix86_data_alignment (tree type, int align, bool opt)
26835 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26836 for symbols from other compilation units or symbols that don't need
26837 to bind locally. In order to preserve some ABI compatibility with
26838 those compilers, ensure we don't decrease alignment from what we
26839 used to assume. */
26841 int max_align_compat
26842 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26844 /* A data structure, equal or greater than the size of a cache line
26845 (64 bytes in the Pentium 4 and other recent Intel processors, including
26846 processors based on Intel Core microarchitecture) should be aligned
26847 so that its base address is a multiple of a cache line size. */
26849 int max_align
26850 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26852 if (max_align < BITS_PER_WORD)
26853 max_align = BITS_PER_WORD;
26855 if (opt
26856 && AGGREGATE_TYPE_P (type)
26857 && TYPE_SIZE (type)
26858 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26860 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat
26861 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26862 && align < max_align_compat)
26863 align = max_align_compat;
26864 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
26865 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26866 && align < max_align)
26867 align = max_align;
26870 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26871 to 16byte boundary. */
26872 if (TARGET_64BIT)
26874 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26875 && TYPE_SIZE (type)
26876 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26877 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
26878 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26879 return 128;
26882 if (!opt)
26883 return align;
26885 if (TREE_CODE (type) == ARRAY_TYPE)
26887 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26888 return 64;
26889 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26890 return 128;
26892 else if (TREE_CODE (type) == COMPLEX_TYPE)
26895 if (TYPE_MODE (type) == DCmode && align < 64)
26896 return 64;
26897 if ((TYPE_MODE (type) == XCmode
26898 || TYPE_MODE (type) == TCmode) && align < 128)
26899 return 128;
26901 else if ((TREE_CODE (type) == RECORD_TYPE
26902 || TREE_CODE (type) == UNION_TYPE
26903 || TREE_CODE (type) == QUAL_UNION_TYPE)
26904 && TYPE_FIELDS (type))
26906 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26907 return 64;
26908 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26909 return 128;
26911 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26912 || TREE_CODE (type) == INTEGER_TYPE)
26914 if (TYPE_MODE (type) == DFmode && align < 64)
26915 return 64;
26916 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26917 return 128;
26920 return align;
26923 /* Compute the alignment for a local variable or a stack slot. EXP is
26924 the data type or decl itself, MODE is the widest mode available and
26925 ALIGN is the alignment that the object would ordinarily have. The
26926 value of this macro is used instead of that alignment to align the
26927 object. */
26929 unsigned int
26930 ix86_local_alignment (tree exp, enum machine_mode mode,
26931 unsigned int align)
26933 tree type, decl;
26935 if (exp && DECL_P (exp))
26937 type = TREE_TYPE (exp);
26938 decl = exp;
26940 else
26942 type = exp;
26943 decl = NULL;
26946 /* Don't do dynamic stack realignment for long long objects with
26947 -mpreferred-stack-boundary=2. */
26948 if (!TARGET_64BIT
26949 && align == 64
26950 && ix86_preferred_stack_boundary < 64
26951 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26952 && (!type || !TYPE_USER_ALIGN (type))
26953 && (!decl || !DECL_USER_ALIGN (decl)))
26954 align = 32;
26956 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26957 register in MODE. We will return the largest alignment of XF
26958 and DF. */
26959 if (!type)
26961 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26962 align = GET_MODE_ALIGNMENT (DFmode);
26963 return align;
26966 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26967 to 16byte boundary. Exact wording is:
26969 An array uses the same alignment as its elements, except that a local or
26970 global array variable of length at least 16 bytes or
26971 a C99 variable-length array variable always has alignment of at least 16 bytes.
26973 This was added to allow use of aligned SSE instructions at arrays. This
26974 rule is meant for static storage (where compiler can not do the analysis
26975 by itself). We follow it for automatic variables only when convenient.
26976 We fully control everything in the function compiled and functions from
26977 other unit can not rely on the alignment.
26979 Exclude va_list type. It is the common case of local array where
26980 we can not benefit from the alignment.
26982 TODO: Probably one should optimize for size only when var is not escaping. */
26983 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26984 && TARGET_SSE)
26986 if (AGGREGATE_TYPE_P (type)
26987 && (va_list_type_node == NULL_TREE
26988 || (TYPE_MAIN_VARIANT (type)
26989 != TYPE_MAIN_VARIANT (va_list_type_node)))
26990 && TYPE_SIZE (type)
26991 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26992 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
26993 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26994 return 128;
26996 if (TREE_CODE (type) == ARRAY_TYPE)
26998 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26999 return 64;
27000 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27001 return 128;
27003 else if (TREE_CODE (type) == COMPLEX_TYPE)
27005 if (TYPE_MODE (type) == DCmode && align < 64)
27006 return 64;
27007 if ((TYPE_MODE (type) == XCmode
27008 || TYPE_MODE (type) == TCmode) && align < 128)
27009 return 128;
27011 else if ((TREE_CODE (type) == RECORD_TYPE
27012 || TREE_CODE (type) == UNION_TYPE
27013 || TREE_CODE (type) == QUAL_UNION_TYPE)
27014 && TYPE_FIELDS (type))
27016 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27017 return 64;
27018 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27019 return 128;
27021 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27022 || TREE_CODE (type) == INTEGER_TYPE)
27025 if (TYPE_MODE (type) == DFmode && align < 64)
27026 return 64;
27027 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27028 return 128;
27030 return align;
27033 /* Compute the minimum required alignment for dynamic stack realignment
27034 purposes for a local variable, parameter or a stack slot. EXP is
27035 the data type or decl itself, MODE is its mode and ALIGN is the
27036 alignment that the object would ordinarily have. */
27038 unsigned int
27039 ix86_minimum_alignment (tree exp, enum machine_mode mode,
27040 unsigned int align)
27042 tree type, decl;
27044 if (exp && DECL_P (exp))
27046 type = TREE_TYPE (exp);
27047 decl = exp;
27049 else
27051 type = exp;
27052 decl = NULL;
27055 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27056 return align;
27058 /* Don't do dynamic stack realignment for long long objects with
27059 -mpreferred-stack-boundary=2. */
27060 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27061 && (!type || !TYPE_USER_ALIGN (type))
27062 && (!decl || !DECL_USER_ALIGN (decl)))
27063 return 32;
27065 return align;
27068 /* Find a location for the static chain incoming to a nested function.
27069 This is a register, unless all free registers are used by arguments. */
27071 static rtx
27072 ix86_static_chain (const_tree fndecl, bool incoming_p)
27074 unsigned regno;
27076 if (!DECL_STATIC_CHAIN (fndecl))
27077 return NULL;
27079 if (TARGET_64BIT)
27081 /* We always use R10 in 64-bit mode. */
27082 regno = R10_REG;
27084 else
27086 tree fntype;
27087 unsigned int ccvt;
27089 /* By default in 32-bit mode we use ECX to pass the static chain. */
27090 regno = CX_REG;
27092 fntype = TREE_TYPE (fndecl);
27093 ccvt = ix86_get_callcvt (fntype);
27094 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27096 /* Fastcall functions use ecx/edx for arguments, which leaves
27097 us with EAX for the static chain.
27098 Thiscall functions use ecx for arguments, which also
27099 leaves us with EAX for the static chain. */
27100 regno = AX_REG;
27102 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27104 /* Thiscall functions use ecx for arguments, which leaves
27105 us with EAX and EDX for the static chain.
27106 We are using for abi-compatibility EAX. */
27107 regno = AX_REG;
27109 else if (ix86_function_regparm (fntype, fndecl) == 3)
27111 /* For regparm 3, we have no free call-clobbered registers in
27112 which to store the static chain. In order to implement this,
27113 we have the trampoline push the static chain to the stack.
27114 However, we can't push a value below the return address when
27115 we call the nested function directly, so we have to use an
27116 alternate entry point. For this we use ESI, and have the
27117 alternate entry point push ESI, so that things appear the
27118 same once we're executing the nested function. */
27119 if (incoming_p)
27121 if (fndecl == current_function_decl)
27122 ix86_static_chain_on_stack = true;
27123 return gen_frame_mem (SImode,
27124 plus_constant (Pmode,
27125 arg_pointer_rtx, -8));
27127 regno = SI_REG;
27131 return gen_rtx_REG (Pmode, regno);
27134 /* Emit RTL insns to initialize the variable parts of a trampoline.
27135 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27136 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27137 to be passed to the target function. */
27139 static void
27140 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27142 rtx mem, fnaddr;
27143 int opcode;
27144 int offset = 0;
27146 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27148 if (TARGET_64BIT)
27150 int size;
27152 /* Load the function address to r11. Try to load address using
27153 the shorter movl instead of movabs. We may want to support
27154 movq for kernel mode, but kernel does not use trampolines at
27155 the moment. FNADDR is a 32bit address and may not be in
27156 DImode when ptr_mode == SImode. Always use movl in this
27157 case. */
27158 if (ptr_mode == SImode
27159 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27161 fnaddr = copy_addr_to_reg (fnaddr);
27163 mem = adjust_address (m_tramp, HImode, offset);
27164 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27166 mem = adjust_address (m_tramp, SImode, offset + 2);
27167 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27168 offset += 6;
27170 else
27172 mem = adjust_address (m_tramp, HImode, offset);
27173 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27175 mem = adjust_address (m_tramp, DImode, offset + 2);
27176 emit_move_insn (mem, fnaddr);
27177 offset += 10;
27180 /* Load static chain using movabs to r10. Use the shorter movl
27181 instead of movabs when ptr_mode == SImode. */
27182 if (ptr_mode == SImode)
27184 opcode = 0xba41;
27185 size = 6;
27187 else
27189 opcode = 0xba49;
27190 size = 10;
27193 mem = adjust_address (m_tramp, HImode, offset);
27194 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27196 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27197 emit_move_insn (mem, chain_value);
27198 offset += size;
27200 /* Jump to r11; the last (unused) byte is a nop, only there to
27201 pad the write out to a single 32-bit store. */
27202 mem = adjust_address (m_tramp, SImode, offset);
27203 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27204 offset += 4;
27206 else
27208 rtx disp, chain;
27210 /* Depending on the static chain location, either load a register
27211 with a constant, or push the constant to the stack. All of the
27212 instructions are the same size. */
27213 chain = ix86_static_chain (fndecl, true);
27214 if (REG_P (chain))
27216 switch (REGNO (chain))
27218 case AX_REG:
27219 opcode = 0xb8; break;
27220 case CX_REG:
27221 opcode = 0xb9; break;
27222 default:
27223 gcc_unreachable ();
27226 else
27227 opcode = 0x68;
27229 mem = adjust_address (m_tramp, QImode, offset);
27230 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27232 mem = adjust_address (m_tramp, SImode, offset + 1);
27233 emit_move_insn (mem, chain_value);
27234 offset += 5;
27236 mem = adjust_address (m_tramp, QImode, offset);
27237 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27239 mem = adjust_address (m_tramp, SImode, offset + 1);
27241 /* Compute offset from the end of the jmp to the target function.
27242 In the case in which the trampoline stores the static chain on
27243 the stack, we need to skip the first insn which pushes the
27244 (call-saved) register static chain; this push is 1 byte. */
27245 offset += 5;
27246 disp = expand_binop (SImode, sub_optab, fnaddr,
27247 plus_constant (Pmode, XEXP (m_tramp, 0),
27248 offset - (MEM_P (chain) ? 1 : 0)),
27249 NULL_RTX, 1, OPTAB_DIRECT);
27250 emit_move_insn (mem, disp);
27253 gcc_assert (offset <= TRAMPOLINE_SIZE);
27255 #ifdef HAVE_ENABLE_EXECUTE_STACK
27256 #ifdef CHECK_EXECUTE_STACK_ENABLED
27257 if (CHECK_EXECUTE_STACK_ENABLED)
27258 #endif
27259 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27260 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27261 #endif
27264 /* The following file contains several enumerations and data structures
27265 built from the definitions in i386-builtin-types.def. */
27267 #include "i386-builtin-types.inc"
27269 /* Table for the ix86 builtin non-function types. */
27270 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27272 /* Retrieve an element from the above table, building some of
27273 the types lazily. */
27275 static tree
27276 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27278 unsigned int index;
27279 tree type, itype;
27281 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27283 type = ix86_builtin_type_tab[(int) tcode];
27284 if (type != NULL)
27285 return type;
27287 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27288 if (tcode <= IX86_BT_LAST_VECT)
27290 enum machine_mode mode;
27292 index = tcode - IX86_BT_LAST_PRIM - 1;
27293 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27294 mode = ix86_builtin_type_vect_mode[index];
27296 type = build_vector_type_for_mode (itype, mode);
27298 else
27300 int quals;
27302 index = tcode - IX86_BT_LAST_VECT - 1;
27303 if (tcode <= IX86_BT_LAST_PTR)
27304 quals = TYPE_UNQUALIFIED;
27305 else
27306 quals = TYPE_QUAL_CONST;
27308 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27309 if (quals != TYPE_UNQUALIFIED)
27310 itype = build_qualified_type (itype, quals);
27312 type = build_pointer_type (itype);
27315 ix86_builtin_type_tab[(int) tcode] = type;
27316 return type;
27319 /* Table for the ix86 builtin function types. */
27320 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27322 /* Retrieve an element from the above table, building some of
27323 the types lazily. */
27325 static tree
27326 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27328 tree type;
27330 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27332 type = ix86_builtin_func_type_tab[(int) tcode];
27333 if (type != NULL)
27334 return type;
27336 if (tcode <= IX86_BT_LAST_FUNC)
27338 unsigned start = ix86_builtin_func_start[(int) tcode];
27339 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27340 tree rtype, atype, args = void_list_node;
27341 unsigned i;
27343 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27344 for (i = after - 1; i > start; --i)
27346 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27347 args = tree_cons (NULL, atype, args);
27350 type = build_function_type (rtype, args);
27352 else
27354 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27355 enum ix86_builtin_func_type icode;
27357 icode = ix86_builtin_func_alias_base[index];
27358 type = ix86_get_builtin_func_type (icode);
27361 ix86_builtin_func_type_tab[(int) tcode] = type;
27362 return type;
27366 /* Codes for all the SSE/MMX builtins. */
27367 enum ix86_builtins
27369 IX86_BUILTIN_ADDPS,
27370 IX86_BUILTIN_ADDSS,
27371 IX86_BUILTIN_DIVPS,
27372 IX86_BUILTIN_DIVSS,
27373 IX86_BUILTIN_MULPS,
27374 IX86_BUILTIN_MULSS,
27375 IX86_BUILTIN_SUBPS,
27376 IX86_BUILTIN_SUBSS,
27378 IX86_BUILTIN_CMPEQPS,
27379 IX86_BUILTIN_CMPLTPS,
27380 IX86_BUILTIN_CMPLEPS,
27381 IX86_BUILTIN_CMPGTPS,
27382 IX86_BUILTIN_CMPGEPS,
27383 IX86_BUILTIN_CMPNEQPS,
27384 IX86_BUILTIN_CMPNLTPS,
27385 IX86_BUILTIN_CMPNLEPS,
27386 IX86_BUILTIN_CMPNGTPS,
27387 IX86_BUILTIN_CMPNGEPS,
27388 IX86_BUILTIN_CMPORDPS,
27389 IX86_BUILTIN_CMPUNORDPS,
27390 IX86_BUILTIN_CMPEQSS,
27391 IX86_BUILTIN_CMPLTSS,
27392 IX86_BUILTIN_CMPLESS,
27393 IX86_BUILTIN_CMPNEQSS,
27394 IX86_BUILTIN_CMPNLTSS,
27395 IX86_BUILTIN_CMPNLESS,
27396 IX86_BUILTIN_CMPORDSS,
27397 IX86_BUILTIN_CMPUNORDSS,
27399 IX86_BUILTIN_COMIEQSS,
27400 IX86_BUILTIN_COMILTSS,
27401 IX86_BUILTIN_COMILESS,
27402 IX86_BUILTIN_COMIGTSS,
27403 IX86_BUILTIN_COMIGESS,
27404 IX86_BUILTIN_COMINEQSS,
27405 IX86_BUILTIN_UCOMIEQSS,
27406 IX86_BUILTIN_UCOMILTSS,
27407 IX86_BUILTIN_UCOMILESS,
27408 IX86_BUILTIN_UCOMIGTSS,
27409 IX86_BUILTIN_UCOMIGESS,
27410 IX86_BUILTIN_UCOMINEQSS,
27412 IX86_BUILTIN_CVTPI2PS,
27413 IX86_BUILTIN_CVTPS2PI,
27414 IX86_BUILTIN_CVTSI2SS,
27415 IX86_BUILTIN_CVTSI642SS,
27416 IX86_BUILTIN_CVTSS2SI,
27417 IX86_BUILTIN_CVTSS2SI64,
27418 IX86_BUILTIN_CVTTPS2PI,
27419 IX86_BUILTIN_CVTTSS2SI,
27420 IX86_BUILTIN_CVTTSS2SI64,
27422 IX86_BUILTIN_MAXPS,
27423 IX86_BUILTIN_MAXSS,
27424 IX86_BUILTIN_MINPS,
27425 IX86_BUILTIN_MINSS,
27427 IX86_BUILTIN_LOADUPS,
27428 IX86_BUILTIN_STOREUPS,
27429 IX86_BUILTIN_MOVSS,
27431 IX86_BUILTIN_MOVHLPS,
27432 IX86_BUILTIN_MOVLHPS,
27433 IX86_BUILTIN_LOADHPS,
27434 IX86_BUILTIN_LOADLPS,
27435 IX86_BUILTIN_STOREHPS,
27436 IX86_BUILTIN_STORELPS,
27438 IX86_BUILTIN_MASKMOVQ,
27439 IX86_BUILTIN_MOVMSKPS,
27440 IX86_BUILTIN_PMOVMSKB,
27442 IX86_BUILTIN_MOVNTPS,
27443 IX86_BUILTIN_MOVNTQ,
27445 IX86_BUILTIN_LOADDQU,
27446 IX86_BUILTIN_STOREDQU,
27448 IX86_BUILTIN_PACKSSWB,
27449 IX86_BUILTIN_PACKSSDW,
27450 IX86_BUILTIN_PACKUSWB,
27452 IX86_BUILTIN_PADDB,
27453 IX86_BUILTIN_PADDW,
27454 IX86_BUILTIN_PADDD,
27455 IX86_BUILTIN_PADDQ,
27456 IX86_BUILTIN_PADDSB,
27457 IX86_BUILTIN_PADDSW,
27458 IX86_BUILTIN_PADDUSB,
27459 IX86_BUILTIN_PADDUSW,
27460 IX86_BUILTIN_PSUBB,
27461 IX86_BUILTIN_PSUBW,
27462 IX86_BUILTIN_PSUBD,
27463 IX86_BUILTIN_PSUBQ,
27464 IX86_BUILTIN_PSUBSB,
27465 IX86_BUILTIN_PSUBSW,
27466 IX86_BUILTIN_PSUBUSB,
27467 IX86_BUILTIN_PSUBUSW,
27469 IX86_BUILTIN_PAND,
27470 IX86_BUILTIN_PANDN,
27471 IX86_BUILTIN_POR,
27472 IX86_BUILTIN_PXOR,
27474 IX86_BUILTIN_PAVGB,
27475 IX86_BUILTIN_PAVGW,
27477 IX86_BUILTIN_PCMPEQB,
27478 IX86_BUILTIN_PCMPEQW,
27479 IX86_BUILTIN_PCMPEQD,
27480 IX86_BUILTIN_PCMPGTB,
27481 IX86_BUILTIN_PCMPGTW,
27482 IX86_BUILTIN_PCMPGTD,
27484 IX86_BUILTIN_PMADDWD,
27486 IX86_BUILTIN_PMAXSW,
27487 IX86_BUILTIN_PMAXUB,
27488 IX86_BUILTIN_PMINSW,
27489 IX86_BUILTIN_PMINUB,
27491 IX86_BUILTIN_PMULHUW,
27492 IX86_BUILTIN_PMULHW,
27493 IX86_BUILTIN_PMULLW,
27495 IX86_BUILTIN_PSADBW,
27496 IX86_BUILTIN_PSHUFW,
27498 IX86_BUILTIN_PSLLW,
27499 IX86_BUILTIN_PSLLD,
27500 IX86_BUILTIN_PSLLQ,
27501 IX86_BUILTIN_PSRAW,
27502 IX86_BUILTIN_PSRAD,
27503 IX86_BUILTIN_PSRLW,
27504 IX86_BUILTIN_PSRLD,
27505 IX86_BUILTIN_PSRLQ,
27506 IX86_BUILTIN_PSLLWI,
27507 IX86_BUILTIN_PSLLDI,
27508 IX86_BUILTIN_PSLLQI,
27509 IX86_BUILTIN_PSRAWI,
27510 IX86_BUILTIN_PSRADI,
27511 IX86_BUILTIN_PSRLWI,
27512 IX86_BUILTIN_PSRLDI,
27513 IX86_BUILTIN_PSRLQI,
27515 IX86_BUILTIN_PUNPCKHBW,
27516 IX86_BUILTIN_PUNPCKHWD,
27517 IX86_BUILTIN_PUNPCKHDQ,
27518 IX86_BUILTIN_PUNPCKLBW,
27519 IX86_BUILTIN_PUNPCKLWD,
27520 IX86_BUILTIN_PUNPCKLDQ,
27522 IX86_BUILTIN_SHUFPS,
27524 IX86_BUILTIN_RCPPS,
27525 IX86_BUILTIN_RCPSS,
27526 IX86_BUILTIN_RSQRTPS,
27527 IX86_BUILTIN_RSQRTPS_NR,
27528 IX86_BUILTIN_RSQRTSS,
27529 IX86_BUILTIN_RSQRTF,
27530 IX86_BUILTIN_SQRTPS,
27531 IX86_BUILTIN_SQRTPS_NR,
27532 IX86_BUILTIN_SQRTSS,
27534 IX86_BUILTIN_UNPCKHPS,
27535 IX86_BUILTIN_UNPCKLPS,
27537 IX86_BUILTIN_ANDPS,
27538 IX86_BUILTIN_ANDNPS,
27539 IX86_BUILTIN_ORPS,
27540 IX86_BUILTIN_XORPS,
27542 IX86_BUILTIN_EMMS,
27543 IX86_BUILTIN_LDMXCSR,
27544 IX86_BUILTIN_STMXCSR,
27545 IX86_BUILTIN_SFENCE,
27547 IX86_BUILTIN_FXSAVE,
27548 IX86_BUILTIN_FXRSTOR,
27549 IX86_BUILTIN_FXSAVE64,
27550 IX86_BUILTIN_FXRSTOR64,
27552 IX86_BUILTIN_XSAVE,
27553 IX86_BUILTIN_XRSTOR,
27554 IX86_BUILTIN_XSAVE64,
27555 IX86_BUILTIN_XRSTOR64,
27557 IX86_BUILTIN_XSAVEOPT,
27558 IX86_BUILTIN_XSAVEOPT64,
27560 /* 3DNow! Original */
27561 IX86_BUILTIN_FEMMS,
27562 IX86_BUILTIN_PAVGUSB,
27563 IX86_BUILTIN_PF2ID,
27564 IX86_BUILTIN_PFACC,
27565 IX86_BUILTIN_PFADD,
27566 IX86_BUILTIN_PFCMPEQ,
27567 IX86_BUILTIN_PFCMPGE,
27568 IX86_BUILTIN_PFCMPGT,
27569 IX86_BUILTIN_PFMAX,
27570 IX86_BUILTIN_PFMIN,
27571 IX86_BUILTIN_PFMUL,
27572 IX86_BUILTIN_PFRCP,
27573 IX86_BUILTIN_PFRCPIT1,
27574 IX86_BUILTIN_PFRCPIT2,
27575 IX86_BUILTIN_PFRSQIT1,
27576 IX86_BUILTIN_PFRSQRT,
27577 IX86_BUILTIN_PFSUB,
27578 IX86_BUILTIN_PFSUBR,
27579 IX86_BUILTIN_PI2FD,
27580 IX86_BUILTIN_PMULHRW,
27582 /* 3DNow! Athlon Extensions */
27583 IX86_BUILTIN_PF2IW,
27584 IX86_BUILTIN_PFNACC,
27585 IX86_BUILTIN_PFPNACC,
27586 IX86_BUILTIN_PI2FW,
27587 IX86_BUILTIN_PSWAPDSI,
27588 IX86_BUILTIN_PSWAPDSF,
27590 /* SSE2 */
27591 IX86_BUILTIN_ADDPD,
27592 IX86_BUILTIN_ADDSD,
27593 IX86_BUILTIN_DIVPD,
27594 IX86_BUILTIN_DIVSD,
27595 IX86_BUILTIN_MULPD,
27596 IX86_BUILTIN_MULSD,
27597 IX86_BUILTIN_SUBPD,
27598 IX86_BUILTIN_SUBSD,
27600 IX86_BUILTIN_CMPEQPD,
27601 IX86_BUILTIN_CMPLTPD,
27602 IX86_BUILTIN_CMPLEPD,
27603 IX86_BUILTIN_CMPGTPD,
27604 IX86_BUILTIN_CMPGEPD,
27605 IX86_BUILTIN_CMPNEQPD,
27606 IX86_BUILTIN_CMPNLTPD,
27607 IX86_BUILTIN_CMPNLEPD,
27608 IX86_BUILTIN_CMPNGTPD,
27609 IX86_BUILTIN_CMPNGEPD,
27610 IX86_BUILTIN_CMPORDPD,
27611 IX86_BUILTIN_CMPUNORDPD,
27612 IX86_BUILTIN_CMPEQSD,
27613 IX86_BUILTIN_CMPLTSD,
27614 IX86_BUILTIN_CMPLESD,
27615 IX86_BUILTIN_CMPNEQSD,
27616 IX86_BUILTIN_CMPNLTSD,
27617 IX86_BUILTIN_CMPNLESD,
27618 IX86_BUILTIN_CMPORDSD,
27619 IX86_BUILTIN_CMPUNORDSD,
27621 IX86_BUILTIN_COMIEQSD,
27622 IX86_BUILTIN_COMILTSD,
27623 IX86_BUILTIN_COMILESD,
27624 IX86_BUILTIN_COMIGTSD,
27625 IX86_BUILTIN_COMIGESD,
27626 IX86_BUILTIN_COMINEQSD,
27627 IX86_BUILTIN_UCOMIEQSD,
27628 IX86_BUILTIN_UCOMILTSD,
27629 IX86_BUILTIN_UCOMILESD,
27630 IX86_BUILTIN_UCOMIGTSD,
27631 IX86_BUILTIN_UCOMIGESD,
27632 IX86_BUILTIN_UCOMINEQSD,
27634 IX86_BUILTIN_MAXPD,
27635 IX86_BUILTIN_MAXSD,
27636 IX86_BUILTIN_MINPD,
27637 IX86_BUILTIN_MINSD,
27639 IX86_BUILTIN_ANDPD,
27640 IX86_BUILTIN_ANDNPD,
27641 IX86_BUILTIN_ORPD,
27642 IX86_BUILTIN_XORPD,
27644 IX86_BUILTIN_SQRTPD,
27645 IX86_BUILTIN_SQRTSD,
27647 IX86_BUILTIN_UNPCKHPD,
27648 IX86_BUILTIN_UNPCKLPD,
27650 IX86_BUILTIN_SHUFPD,
27652 IX86_BUILTIN_LOADUPD,
27653 IX86_BUILTIN_STOREUPD,
27654 IX86_BUILTIN_MOVSD,
27656 IX86_BUILTIN_LOADHPD,
27657 IX86_BUILTIN_LOADLPD,
27659 IX86_BUILTIN_CVTDQ2PD,
27660 IX86_BUILTIN_CVTDQ2PS,
27662 IX86_BUILTIN_CVTPD2DQ,
27663 IX86_BUILTIN_CVTPD2PI,
27664 IX86_BUILTIN_CVTPD2PS,
27665 IX86_BUILTIN_CVTTPD2DQ,
27666 IX86_BUILTIN_CVTTPD2PI,
27668 IX86_BUILTIN_CVTPI2PD,
27669 IX86_BUILTIN_CVTSI2SD,
27670 IX86_BUILTIN_CVTSI642SD,
27672 IX86_BUILTIN_CVTSD2SI,
27673 IX86_BUILTIN_CVTSD2SI64,
27674 IX86_BUILTIN_CVTSD2SS,
27675 IX86_BUILTIN_CVTSS2SD,
27676 IX86_BUILTIN_CVTTSD2SI,
27677 IX86_BUILTIN_CVTTSD2SI64,
27679 IX86_BUILTIN_CVTPS2DQ,
27680 IX86_BUILTIN_CVTPS2PD,
27681 IX86_BUILTIN_CVTTPS2DQ,
27683 IX86_BUILTIN_MOVNTI,
27684 IX86_BUILTIN_MOVNTI64,
27685 IX86_BUILTIN_MOVNTPD,
27686 IX86_BUILTIN_MOVNTDQ,
27688 IX86_BUILTIN_MOVQ128,
27690 /* SSE2 MMX */
27691 IX86_BUILTIN_MASKMOVDQU,
27692 IX86_BUILTIN_MOVMSKPD,
27693 IX86_BUILTIN_PMOVMSKB128,
27695 IX86_BUILTIN_PACKSSWB128,
27696 IX86_BUILTIN_PACKSSDW128,
27697 IX86_BUILTIN_PACKUSWB128,
27699 IX86_BUILTIN_PADDB128,
27700 IX86_BUILTIN_PADDW128,
27701 IX86_BUILTIN_PADDD128,
27702 IX86_BUILTIN_PADDQ128,
27703 IX86_BUILTIN_PADDSB128,
27704 IX86_BUILTIN_PADDSW128,
27705 IX86_BUILTIN_PADDUSB128,
27706 IX86_BUILTIN_PADDUSW128,
27707 IX86_BUILTIN_PSUBB128,
27708 IX86_BUILTIN_PSUBW128,
27709 IX86_BUILTIN_PSUBD128,
27710 IX86_BUILTIN_PSUBQ128,
27711 IX86_BUILTIN_PSUBSB128,
27712 IX86_BUILTIN_PSUBSW128,
27713 IX86_BUILTIN_PSUBUSB128,
27714 IX86_BUILTIN_PSUBUSW128,
27716 IX86_BUILTIN_PAND128,
27717 IX86_BUILTIN_PANDN128,
27718 IX86_BUILTIN_POR128,
27719 IX86_BUILTIN_PXOR128,
27721 IX86_BUILTIN_PAVGB128,
27722 IX86_BUILTIN_PAVGW128,
27724 IX86_BUILTIN_PCMPEQB128,
27725 IX86_BUILTIN_PCMPEQW128,
27726 IX86_BUILTIN_PCMPEQD128,
27727 IX86_BUILTIN_PCMPGTB128,
27728 IX86_BUILTIN_PCMPGTW128,
27729 IX86_BUILTIN_PCMPGTD128,
27731 IX86_BUILTIN_PMADDWD128,
27733 IX86_BUILTIN_PMAXSW128,
27734 IX86_BUILTIN_PMAXUB128,
27735 IX86_BUILTIN_PMINSW128,
27736 IX86_BUILTIN_PMINUB128,
27738 IX86_BUILTIN_PMULUDQ,
27739 IX86_BUILTIN_PMULUDQ128,
27740 IX86_BUILTIN_PMULHUW128,
27741 IX86_BUILTIN_PMULHW128,
27742 IX86_BUILTIN_PMULLW128,
27744 IX86_BUILTIN_PSADBW128,
27745 IX86_BUILTIN_PSHUFHW,
27746 IX86_BUILTIN_PSHUFLW,
27747 IX86_BUILTIN_PSHUFD,
27749 IX86_BUILTIN_PSLLDQI128,
27750 IX86_BUILTIN_PSLLWI128,
27751 IX86_BUILTIN_PSLLDI128,
27752 IX86_BUILTIN_PSLLQI128,
27753 IX86_BUILTIN_PSRAWI128,
27754 IX86_BUILTIN_PSRADI128,
27755 IX86_BUILTIN_PSRLDQI128,
27756 IX86_BUILTIN_PSRLWI128,
27757 IX86_BUILTIN_PSRLDI128,
27758 IX86_BUILTIN_PSRLQI128,
27760 IX86_BUILTIN_PSLLDQ128,
27761 IX86_BUILTIN_PSLLW128,
27762 IX86_BUILTIN_PSLLD128,
27763 IX86_BUILTIN_PSLLQ128,
27764 IX86_BUILTIN_PSRAW128,
27765 IX86_BUILTIN_PSRAD128,
27766 IX86_BUILTIN_PSRLW128,
27767 IX86_BUILTIN_PSRLD128,
27768 IX86_BUILTIN_PSRLQ128,
27770 IX86_BUILTIN_PUNPCKHBW128,
27771 IX86_BUILTIN_PUNPCKHWD128,
27772 IX86_BUILTIN_PUNPCKHDQ128,
27773 IX86_BUILTIN_PUNPCKHQDQ128,
27774 IX86_BUILTIN_PUNPCKLBW128,
27775 IX86_BUILTIN_PUNPCKLWD128,
27776 IX86_BUILTIN_PUNPCKLDQ128,
27777 IX86_BUILTIN_PUNPCKLQDQ128,
27779 IX86_BUILTIN_CLFLUSH,
27780 IX86_BUILTIN_MFENCE,
27781 IX86_BUILTIN_LFENCE,
27782 IX86_BUILTIN_PAUSE,
27784 IX86_BUILTIN_FNSTENV,
27785 IX86_BUILTIN_FLDENV,
27786 IX86_BUILTIN_FNSTSW,
27787 IX86_BUILTIN_FNCLEX,
27789 IX86_BUILTIN_BSRSI,
27790 IX86_BUILTIN_BSRDI,
27791 IX86_BUILTIN_RDPMC,
27792 IX86_BUILTIN_RDTSC,
27793 IX86_BUILTIN_RDTSCP,
27794 IX86_BUILTIN_ROLQI,
27795 IX86_BUILTIN_ROLHI,
27796 IX86_BUILTIN_RORQI,
27797 IX86_BUILTIN_RORHI,
27799 /* SSE3. */
27800 IX86_BUILTIN_ADDSUBPS,
27801 IX86_BUILTIN_HADDPS,
27802 IX86_BUILTIN_HSUBPS,
27803 IX86_BUILTIN_MOVSHDUP,
27804 IX86_BUILTIN_MOVSLDUP,
27805 IX86_BUILTIN_ADDSUBPD,
27806 IX86_BUILTIN_HADDPD,
27807 IX86_BUILTIN_HSUBPD,
27808 IX86_BUILTIN_LDDQU,
27810 IX86_BUILTIN_MONITOR,
27811 IX86_BUILTIN_MWAIT,
27813 /* SSSE3. */
27814 IX86_BUILTIN_PHADDW,
27815 IX86_BUILTIN_PHADDD,
27816 IX86_BUILTIN_PHADDSW,
27817 IX86_BUILTIN_PHSUBW,
27818 IX86_BUILTIN_PHSUBD,
27819 IX86_BUILTIN_PHSUBSW,
27820 IX86_BUILTIN_PMADDUBSW,
27821 IX86_BUILTIN_PMULHRSW,
27822 IX86_BUILTIN_PSHUFB,
27823 IX86_BUILTIN_PSIGNB,
27824 IX86_BUILTIN_PSIGNW,
27825 IX86_BUILTIN_PSIGND,
27826 IX86_BUILTIN_PALIGNR,
27827 IX86_BUILTIN_PABSB,
27828 IX86_BUILTIN_PABSW,
27829 IX86_BUILTIN_PABSD,
27831 IX86_BUILTIN_PHADDW128,
27832 IX86_BUILTIN_PHADDD128,
27833 IX86_BUILTIN_PHADDSW128,
27834 IX86_BUILTIN_PHSUBW128,
27835 IX86_BUILTIN_PHSUBD128,
27836 IX86_BUILTIN_PHSUBSW128,
27837 IX86_BUILTIN_PMADDUBSW128,
27838 IX86_BUILTIN_PMULHRSW128,
27839 IX86_BUILTIN_PSHUFB128,
27840 IX86_BUILTIN_PSIGNB128,
27841 IX86_BUILTIN_PSIGNW128,
27842 IX86_BUILTIN_PSIGND128,
27843 IX86_BUILTIN_PALIGNR128,
27844 IX86_BUILTIN_PABSB128,
27845 IX86_BUILTIN_PABSW128,
27846 IX86_BUILTIN_PABSD128,
27848 /* AMDFAM10 - SSE4A New Instructions. */
27849 IX86_BUILTIN_MOVNTSD,
27850 IX86_BUILTIN_MOVNTSS,
27851 IX86_BUILTIN_EXTRQI,
27852 IX86_BUILTIN_EXTRQ,
27853 IX86_BUILTIN_INSERTQI,
27854 IX86_BUILTIN_INSERTQ,
27856 /* SSE4.1. */
27857 IX86_BUILTIN_BLENDPD,
27858 IX86_BUILTIN_BLENDPS,
27859 IX86_BUILTIN_BLENDVPD,
27860 IX86_BUILTIN_BLENDVPS,
27861 IX86_BUILTIN_PBLENDVB128,
27862 IX86_BUILTIN_PBLENDW128,
27864 IX86_BUILTIN_DPPD,
27865 IX86_BUILTIN_DPPS,
27867 IX86_BUILTIN_INSERTPS128,
27869 IX86_BUILTIN_MOVNTDQA,
27870 IX86_BUILTIN_MPSADBW128,
27871 IX86_BUILTIN_PACKUSDW128,
27872 IX86_BUILTIN_PCMPEQQ,
27873 IX86_BUILTIN_PHMINPOSUW128,
27875 IX86_BUILTIN_PMAXSB128,
27876 IX86_BUILTIN_PMAXSD128,
27877 IX86_BUILTIN_PMAXUD128,
27878 IX86_BUILTIN_PMAXUW128,
27880 IX86_BUILTIN_PMINSB128,
27881 IX86_BUILTIN_PMINSD128,
27882 IX86_BUILTIN_PMINUD128,
27883 IX86_BUILTIN_PMINUW128,
27885 IX86_BUILTIN_PMOVSXBW128,
27886 IX86_BUILTIN_PMOVSXBD128,
27887 IX86_BUILTIN_PMOVSXBQ128,
27888 IX86_BUILTIN_PMOVSXWD128,
27889 IX86_BUILTIN_PMOVSXWQ128,
27890 IX86_BUILTIN_PMOVSXDQ128,
27892 IX86_BUILTIN_PMOVZXBW128,
27893 IX86_BUILTIN_PMOVZXBD128,
27894 IX86_BUILTIN_PMOVZXBQ128,
27895 IX86_BUILTIN_PMOVZXWD128,
27896 IX86_BUILTIN_PMOVZXWQ128,
27897 IX86_BUILTIN_PMOVZXDQ128,
27899 IX86_BUILTIN_PMULDQ128,
27900 IX86_BUILTIN_PMULLD128,
27902 IX86_BUILTIN_ROUNDSD,
27903 IX86_BUILTIN_ROUNDSS,
27905 IX86_BUILTIN_ROUNDPD,
27906 IX86_BUILTIN_ROUNDPS,
27908 IX86_BUILTIN_FLOORPD,
27909 IX86_BUILTIN_CEILPD,
27910 IX86_BUILTIN_TRUNCPD,
27911 IX86_BUILTIN_RINTPD,
27912 IX86_BUILTIN_ROUNDPD_AZ,
27914 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27915 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27916 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27918 IX86_BUILTIN_FLOORPS,
27919 IX86_BUILTIN_CEILPS,
27920 IX86_BUILTIN_TRUNCPS,
27921 IX86_BUILTIN_RINTPS,
27922 IX86_BUILTIN_ROUNDPS_AZ,
27924 IX86_BUILTIN_FLOORPS_SFIX,
27925 IX86_BUILTIN_CEILPS_SFIX,
27926 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27928 IX86_BUILTIN_PTESTZ,
27929 IX86_BUILTIN_PTESTC,
27930 IX86_BUILTIN_PTESTNZC,
27932 IX86_BUILTIN_VEC_INIT_V2SI,
27933 IX86_BUILTIN_VEC_INIT_V4HI,
27934 IX86_BUILTIN_VEC_INIT_V8QI,
27935 IX86_BUILTIN_VEC_EXT_V2DF,
27936 IX86_BUILTIN_VEC_EXT_V2DI,
27937 IX86_BUILTIN_VEC_EXT_V4SF,
27938 IX86_BUILTIN_VEC_EXT_V4SI,
27939 IX86_BUILTIN_VEC_EXT_V8HI,
27940 IX86_BUILTIN_VEC_EXT_V2SI,
27941 IX86_BUILTIN_VEC_EXT_V4HI,
27942 IX86_BUILTIN_VEC_EXT_V16QI,
27943 IX86_BUILTIN_VEC_SET_V2DI,
27944 IX86_BUILTIN_VEC_SET_V4SF,
27945 IX86_BUILTIN_VEC_SET_V4SI,
27946 IX86_BUILTIN_VEC_SET_V8HI,
27947 IX86_BUILTIN_VEC_SET_V4HI,
27948 IX86_BUILTIN_VEC_SET_V16QI,
27950 IX86_BUILTIN_VEC_PACK_SFIX,
27951 IX86_BUILTIN_VEC_PACK_SFIX256,
27953 /* SSE4.2. */
27954 IX86_BUILTIN_CRC32QI,
27955 IX86_BUILTIN_CRC32HI,
27956 IX86_BUILTIN_CRC32SI,
27957 IX86_BUILTIN_CRC32DI,
27959 IX86_BUILTIN_PCMPESTRI128,
27960 IX86_BUILTIN_PCMPESTRM128,
27961 IX86_BUILTIN_PCMPESTRA128,
27962 IX86_BUILTIN_PCMPESTRC128,
27963 IX86_BUILTIN_PCMPESTRO128,
27964 IX86_BUILTIN_PCMPESTRS128,
27965 IX86_BUILTIN_PCMPESTRZ128,
27966 IX86_BUILTIN_PCMPISTRI128,
27967 IX86_BUILTIN_PCMPISTRM128,
27968 IX86_BUILTIN_PCMPISTRA128,
27969 IX86_BUILTIN_PCMPISTRC128,
27970 IX86_BUILTIN_PCMPISTRO128,
27971 IX86_BUILTIN_PCMPISTRS128,
27972 IX86_BUILTIN_PCMPISTRZ128,
27974 IX86_BUILTIN_PCMPGTQ,
27976 /* AES instructions */
27977 IX86_BUILTIN_AESENC128,
27978 IX86_BUILTIN_AESENCLAST128,
27979 IX86_BUILTIN_AESDEC128,
27980 IX86_BUILTIN_AESDECLAST128,
27981 IX86_BUILTIN_AESIMC128,
27982 IX86_BUILTIN_AESKEYGENASSIST128,
27984 /* PCLMUL instruction */
27985 IX86_BUILTIN_PCLMULQDQ128,
27987 /* AVX */
27988 IX86_BUILTIN_ADDPD256,
27989 IX86_BUILTIN_ADDPS256,
27990 IX86_BUILTIN_ADDSUBPD256,
27991 IX86_BUILTIN_ADDSUBPS256,
27992 IX86_BUILTIN_ANDPD256,
27993 IX86_BUILTIN_ANDPS256,
27994 IX86_BUILTIN_ANDNPD256,
27995 IX86_BUILTIN_ANDNPS256,
27996 IX86_BUILTIN_BLENDPD256,
27997 IX86_BUILTIN_BLENDPS256,
27998 IX86_BUILTIN_BLENDVPD256,
27999 IX86_BUILTIN_BLENDVPS256,
28000 IX86_BUILTIN_DIVPD256,
28001 IX86_BUILTIN_DIVPS256,
28002 IX86_BUILTIN_DPPS256,
28003 IX86_BUILTIN_HADDPD256,
28004 IX86_BUILTIN_HADDPS256,
28005 IX86_BUILTIN_HSUBPD256,
28006 IX86_BUILTIN_HSUBPS256,
28007 IX86_BUILTIN_MAXPD256,
28008 IX86_BUILTIN_MAXPS256,
28009 IX86_BUILTIN_MINPD256,
28010 IX86_BUILTIN_MINPS256,
28011 IX86_BUILTIN_MULPD256,
28012 IX86_BUILTIN_MULPS256,
28013 IX86_BUILTIN_ORPD256,
28014 IX86_BUILTIN_ORPS256,
28015 IX86_BUILTIN_SHUFPD256,
28016 IX86_BUILTIN_SHUFPS256,
28017 IX86_BUILTIN_SUBPD256,
28018 IX86_BUILTIN_SUBPS256,
28019 IX86_BUILTIN_XORPD256,
28020 IX86_BUILTIN_XORPS256,
28021 IX86_BUILTIN_CMPSD,
28022 IX86_BUILTIN_CMPSS,
28023 IX86_BUILTIN_CMPPD,
28024 IX86_BUILTIN_CMPPS,
28025 IX86_BUILTIN_CMPPD256,
28026 IX86_BUILTIN_CMPPS256,
28027 IX86_BUILTIN_CVTDQ2PD256,
28028 IX86_BUILTIN_CVTDQ2PS256,
28029 IX86_BUILTIN_CVTPD2PS256,
28030 IX86_BUILTIN_CVTPS2DQ256,
28031 IX86_BUILTIN_CVTPS2PD256,
28032 IX86_BUILTIN_CVTTPD2DQ256,
28033 IX86_BUILTIN_CVTPD2DQ256,
28034 IX86_BUILTIN_CVTTPS2DQ256,
28035 IX86_BUILTIN_EXTRACTF128PD256,
28036 IX86_BUILTIN_EXTRACTF128PS256,
28037 IX86_BUILTIN_EXTRACTF128SI256,
28038 IX86_BUILTIN_VZEROALL,
28039 IX86_BUILTIN_VZEROUPPER,
28040 IX86_BUILTIN_VPERMILVARPD,
28041 IX86_BUILTIN_VPERMILVARPS,
28042 IX86_BUILTIN_VPERMILVARPD256,
28043 IX86_BUILTIN_VPERMILVARPS256,
28044 IX86_BUILTIN_VPERMILPD,
28045 IX86_BUILTIN_VPERMILPS,
28046 IX86_BUILTIN_VPERMILPD256,
28047 IX86_BUILTIN_VPERMILPS256,
28048 IX86_BUILTIN_VPERMIL2PD,
28049 IX86_BUILTIN_VPERMIL2PS,
28050 IX86_BUILTIN_VPERMIL2PD256,
28051 IX86_BUILTIN_VPERMIL2PS256,
28052 IX86_BUILTIN_VPERM2F128PD256,
28053 IX86_BUILTIN_VPERM2F128PS256,
28054 IX86_BUILTIN_VPERM2F128SI256,
28055 IX86_BUILTIN_VBROADCASTSS,
28056 IX86_BUILTIN_VBROADCASTSD256,
28057 IX86_BUILTIN_VBROADCASTSS256,
28058 IX86_BUILTIN_VBROADCASTPD256,
28059 IX86_BUILTIN_VBROADCASTPS256,
28060 IX86_BUILTIN_VINSERTF128PD256,
28061 IX86_BUILTIN_VINSERTF128PS256,
28062 IX86_BUILTIN_VINSERTF128SI256,
28063 IX86_BUILTIN_LOADUPD256,
28064 IX86_BUILTIN_LOADUPS256,
28065 IX86_BUILTIN_STOREUPD256,
28066 IX86_BUILTIN_STOREUPS256,
28067 IX86_BUILTIN_LDDQU256,
28068 IX86_BUILTIN_MOVNTDQ256,
28069 IX86_BUILTIN_MOVNTPD256,
28070 IX86_BUILTIN_MOVNTPS256,
28071 IX86_BUILTIN_LOADDQU256,
28072 IX86_BUILTIN_STOREDQU256,
28073 IX86_BUILTIN_MASKLOADPD,
28074 IX86_BUILTIN_MASKLOADPS,
28075 IX86_BUILTIN_MASKSTOREPD,
28076 IX86_BUILTIN_MASKSTOREPS,
28077 IX86_BUILTIN_MASKLOADPD256,
28078 IX86_BUILTIN_MASKLOADPS256,
28079 IX86_BUILTIN_MASKSTOREPD256,
28080 IX86_BUILTIN_MASKSTOREPS256,
28081 IX86_BUILTIN_MOVSHDUP256,
28082 IX86_BUILTIN_MOVSLDUP256,
28083 IX86_BUILTIN_MOVDDUP256,
28085 IX86_BUILTIN_SQRTPD256,
28086 IX86_BUILTIN_SQRTPS256,
28087 IX86_BUILTIN_SQRTPS_NR256,
28088 IX86_BUILTIN_RSQRTPS256,
28089 IX86_BUILTIN_RSQRTPS_NR256,
28091 IX86_BUILTIN_RCPPS256,
28093 IX86_BUILTIN_ROUNDPD256,
28094 IX86_BUILTIN_ROUNDPS256,
28096 IX86_BUILTIN_FLOORPD256,
28097 IX86_BUILTIN_CEILPD256,
28098 IX86_BUILTIN_TRUNCPD256,
28099 IX86_BUILTIN_RINTPD256,
28100 IX86_BUILTIN_ROUNDPD_AZ256,
28102 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28103 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28104 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28106 IX86_BUILTIN_FLOORPS256,
28107 IX86_BUILTIN_CEILPS256,
28108 IX86_BUILTIN_TRUNCPS256,
28109 IX86_BUILTIN_RINTPS256,
28110 IX86_BUILTIN_ROUNDPS_AZ256,
28112 IX86_BUILTIN_FLOORPS_SFIX256,
28113 IX86_BUILTIN_CEILPS_SFIX256,
28114 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28116 IX86_BUILTIN_UNPCKHPD256,
28117 IX86_BUILTIN_UNPCKLPD256,
28118 IX86_BUILTIN_UNPCKHPS256,
28119 IX86_BUILTIN_UNPCKLPS256,
28121 IX86_BUILTIN_SI256_SI,
28122 IX86_BUILTIN_PS256_PS,
28123 IX86_BUILTIN_PD256_PD,
28124 IX86_BUILTIN_SI_SI256,
28125 IX86_BUILTIN_PS_PS256,
28126 IX86_BUILTIN_PD_PD256,
28128 IX86_BUILTIN_VTESTZPD,
28129 IX86_BUILTIN_VTESTCPD,
28130 IX86_BUILTIN_VTESTNZCPD,
28131 IX86_BUILTIN_VTESTZPS,
28132 IX86_BUILTIN_VTESTCPS,
28133 IX86_BUILTIN_VTESTNZCPS,
28134 IX86_BUILTIN_VTESTZPD256,
28135 IX86_BUILTIN_VTESTCPD256,
28136 IX86_BUILTIN_VTESTNZCPD256,
28137 IX86_BUILTIN_VTESTZPS256,
28138 IX86_BUILTIN_VTESTCPS256,
28139 IX86_BUILTIN_VTESTNZCPS256,
28140 IX86_BUILTIN_PTESTZ256,
28141 IX86_BUILTIN_PTESTC256,
28142 IX86_BUILTIN_PTESTNZC256,
28144 IX86_BUILTIN_MOVMSKPD256,
28145 IX86_BUILTIN_MOVMSKPS256,
28147 /* AVX2 */
28148 IX86_BUILTIN_MPSADBW256,
28149 IX86_BUILTIN_PABSB256,
28150 IX86_BUILTIN_PABSW256,
28151 IX86_BUILTIN_PABSD256,
28152 IX86_BUILTIN_PACKSSDW256,
28153 IX86_BUILTIN_PACKSSWB256,
28154 IX86_BUILTIN_PACKUSDW256,
28155 IX86_BUILTIN_PACKUSWB256,
28156 IX86_BUILTIN_PADDB256,
28157 IX86_BUILTIN_PADDW256,
28158 IX86_BUILTIN_PADDD256,
28159 IX86_BUILTIN_PADDQ256,
28160 IX86_BUILTIN_PADDSB256,
28161 IX86_BUILTIN_PADDSW256,
28162 IX86_BUILTIN_PADDUSB256,
28163 IX86_BUILTIN_PADDUSW256,
28164 IX86_BUILTIN_PALIGNR256,
28165 IX86_BUILTIN_AND256I,
28166 IX86_BUILTIN_ANDNOT256I,
28167 IX86_BUILTIN_PAVGB256,
28168 IX86_BUILTIN_PAVGW256,
28169 IX86_BUILTIN_PBLENDVB256,
28170 IX86_BUILTIN_PBLENDVW256,
28171 IX86_BUILTIN_PCMPEQB256,
28172 IX86_BUILTIN_PCMPEQW256,
28173 IX86_BUILTIN_PCMPEQD256,
28174 IX86_BUILTIN_PCMPEQQ256,
28175 IX86_BUILTIN_PCMPGTB256,
28176 IX86_BUILTIN_PCMPGTW256,
28177 IX86_BUILTIN_PCMPGTD256,
28178 IX86_BUILTIN_PCMPGTQ256,
28179 IX86_BUILTIN_PHADDW256,
28180 IX86_BUILTIN_PHADDD256,
28181 IX86_BUILTIN_PHADDSW256,
28182 IX86_BUILTIN_PHSUBW256,
28183 IX86_BUILTIN_PHSUBD256,
28184 IX86_BUILTIN_PHSUBSW256,
28185 IX86_BUILTIN_PMADDUBSW256,
28186 IX86_BUILTIN_PMADDWD256,
28187 IX86_BUILTIN_PMAXSB256,
28188 IX86_BUILTIN_PMAXSW256,
28189 IX86_BUILTIN_PMAXSD256,
28190 IX86_BUILTIN_PMAXUB256,
28191 IX86_BUILTIN_PMAXUW256,
28192 IX86_BUILTIN_PMAXUD256,
28193 IX86_BUILTIN_PMINSB256,
28194 IX86_BUILTIN_PMINSW256,
28195 IX86_BUILTIN_PMINSD256,
28196 IX86_BUILTIN_PMINUB256,
28197 IX86_BUILTIN_PMINUW256,
28198 IX86_BUILTIN_PMINUD256,
28199 IX86_BUILTIN_PMOVMSKB256,
28200 IX86_BUILTIN_PMOVSXBW256,
28201 IX86_BUILTIN_PMOVSXBD256,
28202 IX86_BUILTIN_PMOVSXBQ256,
28203 IX86_BUILTIN_PMOVSXWD256,
28204 IX86_BUILTIN_PMOVSXWQ256,
28205 IX86_BUILTIN_PMOVSXDQ256,
28206 IX86_BUILTIN_PMOVZXBW256,
28207 IX86_BUILTIN_PMOVZXBD256,
28208 IX86_BUILTIN_PMOVZXBQ256,
28209 IX86_BUILTIN_PMOVZXWD256,
28210 IX86_BUILTIN_PMOVZXWQ256,
28211 IX86_BUILTIN_PMOVZXDQ256,
28212 IX86_BUILTIN_PMULDQ256,
28213 IX86_BUILTIN_PMULHRSW256,
28214 IX86_BUILTIN_PMULHUW256,
28215 IX86_BUILTIN_PMULHW256,
28216 IX86_BUILTIN_PMULLW256,
28217 IX86_BUILTIN_PMULLD256,
28218 IX86_BUILTIN_PMULUDQ256,
28219 IX86_BUILTIN_POR256,
28220 IX86_BUILTIN_PSADBW256,
28221 IX86_BUILTIN_PSHUFB256,
28222 IX86_BUILTIN_PSHUFD256,
28223 IX86_BUILTIN_PSHUFHW256,
28224 IX86_BUILTIN_PSHUFLW256,
28225 IX86_BUILTIN_PSIGNB256,
28226 IX86_BUILTIN_PSIGNW256,
28227 IX86_BUILTIN_PSIGND256,
28228 IX86_BUILTIN_PSLLDQI256,
28229 IX86_BUILTIN_PSLLWI256,
28230 IX86_BUILTIN_PSLLW256,
28231 IX86_BUILTIN_PSLLDI256,
28232 IX86_BUILTIN_PSLLD256,
28233 IX86_BUILTIN_PSLLQI256,
28234 IX86_BUILTIN_PSLLQ256,
28235 IX86_BUILTIN_PSRAWI256,
28236 IX86_BUILTIN_PSRAW256,
28237 IX86_BUILTIN_PSRADI256,
28238 IX86_BUILTIN_PSRAD256,
28239 IX86_BUILTIN_PSRLDQI256,
28240 IX86_BUILTIN_PSRLWI256,
28241 IX86_BUILTIN_PSRLW256,
28242 IX86_BUILTIN_PSRLDI256,
28243 IX86_BUILTIN_PSRLD256,
28244 IX86_BUILTIN_PSRLQI256,
28245 IX86_BUILTIN_PSRLQ256,
28246 IX86_BUILTIN_PSUBB256,
28247 IX86_BUILTIN_PSUBW256,
28248 IX86_BUILTIN_PSUBD256,
28249 IX86_BUILTIN_PSUBQ256,
28250 IX86_BUILTIN_PSUBSB256,
28251 IX86_BUILTIN_PSUBSW256,
28252 IX86_BUILTIN_PSUBUSB256,
28253 IX86_BUILTIN_PSUBUSW256,
28254 IX86_BUILTIN_PUNPCKHBW256,
28255 IX86_BUILTIN_PUNPCKHWD256,
28256 IX86_BUILTIN_PUNPCKHDQ256,
28257 IX86_BUILTIN_PUNPCKHQDQ256,
28258 IX86_BUILTIN_PUNPCKLBW256,
28259 IX86_BUILTIN_PUNPCKLWD256,
28260 IX86_BUILTIN_PUNPCKLDQ256,
28261 IX86_BUILTIN_PUNPCKLQDQ256,
28262 IX86_BUILTIN_PXOR256,
28263 IX86_BUILTIN_MOVNTDQA256,
28264 IX86_BUILTIN_VBROADCASTSS_PS,
28265 IX86_BUILTIN_VBROADCASTSS_PS256,
28266 IX86_BUILTIN_VBROADCASTSD_PD256,
28267 IX86_BUILTIN_VBROADCASTSI256,
28268 IX86_BUILTIN_PBLENDD256,
28269 IX86_BUILTIN_PBLENDD128,
28270 IX86_BUILTIN_PBROADCASTB256,
28271 IX86_BUILTIN_PBROADCASTW256,
28272 IX86_BUILTIN_PBROADCASTD256,
28273 IX86_BUILTIN_PBROADCASTQ256,
28274 IX86_BUILTIN_PBROADCASTB128,
28275 IX86_BUILTIN_PBROADCASTW128,
28276 IX86_BUILTIN_PBROADCASTD128,
28277 IX86_BUILTIN_PBROADCASTQ128,
28278 IX86_BUILTIN_VPERMVARSI256,
28279 IX86_BUILTIN_VPERMDF256,
28280 IX86_BUILTIN_VPERMVARSF256,
28281 IX86_BUILTIN_VPERMDI256,
28282 IX86_BUILTIN_VPERMTI256,
28283 IX86_BUILTIN_VEXTRACT128I256,
28284 IX86_BUILTIN_VINSERT128I256,
28285 IX86_BUILTIN_MASKLOADD,
28286 IX86_BUILTIN_MASKLOADQ,
28287 IX86_BUILTIN_MASKLOADD256,
28288 IX86_BUILTIN_MASKLOADQ256,
28289 IX86_BUILTIN_MASKSTORED,
28290 IX86_BUILTIN_MASKSTOREQ,
28291 IX86_BUILTIN_MASKSTORED256,
28292 IX86_BUILTIN_MASKSTOREQ256,
28293 IX86_BUILTIN_PSLLVV4DI,
28294 IX86_BUILTIN_PSLLVV2DI,
28295 IX86_BUILTIN_PSLLVV8SI,
28296 IX86_BUILTIN_PSLLVV4SI,
28297 IX86_BUILTIN_PSRAVV8SI,
28298 IX86_BUILTIN_PSRAVV4SI,
28299 IX86_BUILTIN_PSRLVV4DI,
28300 IX86_BUILTIN_PSRLVV2DI,
28301 IX86_BUILTIN_PSRLVV8SI,
28302 IX86_BUILTIN_PSRLVV4SI,
28304 IX86_BUILTIN_GATHERSIV2DF,
28305 IX86_BUILTIN_GATHERSIV4DF,
28306 IX86_BUILTIN_GATHERDIV2DF,
28307 IX86_BUILTIN_GATHERDIV4DF,
28308 IX86_BUILTIN_GATHERSIV4SF,
28309 IX86_BUILTIN_GATHERSIV8SF,
28310 IX86_BUILTIN_GATHERDIV4SF,
28311 IX86_BUILTIN_GATHERDIV8SF,
28312 IX86_BUILTIN_GATHERSIV2DI,
28313 IX86_BUILTIN_GATHERSIV4DI,
28314 IX86_BUILTIN_GATHERDIV2DI,
28315 IX86_BUILTIN_GATHERDIV4DI,
28316 IX86_BUILTIN_GATHERSIV4SI,
28317 IX86_BUILTIN_GATHERSIV8SI,
28318 IX86_BUILTIN_GATHERDIV4SI,
28319 IX86_BUILTIN_GATHERDIV8SI,
28321 /* AVX512F */
28322 IX86_BUILTIN_ADDPD512,
28323 IX86_BUILTIN_ADDPS512,
28324 IX86_BUILTIN_ADDSD_ROUND,
28325 IX86_BUILTIN_ADDSS_ROUND,
28326 IX86_BUILTIN_ALIGND512,
28327 IX86_BUILTIN_ALIGNQ512,
28328 IX86_BUILTIN_BLENDMD512,
28329 IX86_BUILTIN_BLENDMPD512,
28330 IX86_BUILTIN_BLENDMPS512,
28331 IX86_BUILTIN_BLENDMQ512,
28332 IX86_BUILTIN_BROADCASTF32X4_512,
28333 IX86_BUILTIN_BROADCASTF64X4_512,
28334 IX86_BUILTIN_BROADCASTI32X4_512,
28335 IX86_BUILTIN_BROADCASTI64X4_512,
28336 IX86_BUILTIN_BROADCASTSD512,
28337 IX86_BUILTIN_BROADCASTSS512,
28338 IX86_BUILTIN_CMPD512,
28339 IX86_BUILTIN_CMPPD512,
28340 IX86_BUILTIN_CMPPS512,
28341 IX86_BUILTIN_CMPQ512,
28342 IX86_BUILTIN_CMPSD_MASK,
28343 IX86_BUILTIN_CMPSS_MASK,
28344 IX86_BUILTIN_COMIDF,
28345 IX86_BUILTIN_COMISF,
28346 IX86_BUILTIN_COMPRESSPD512,
28347 IX86_BUILTIN_COMPRESSPDSTORE512,
28348 IX86_BUILTIN_COMPRESSPS512,
28349 IX86_BUILTIN_COMPRESSPSSTORE512,
28350 IX86_BUILTIN_CVTDQ2PD512,
28351 IX86_BUILTIN_CVTDQ2PS512,
28352 IX86_BUILTIN_CVTPD2DQ512,
28353 IX86_BUILTIN_CVTPD2PS512,
28354 IX86_BUILTIN_CVTPD2UDQ512,
28355 IX86_BUILTIN_CVTPH2PS512,
28356 IX86_BUILTIN_CVTPS2DQ512,
28357 IX86_BUILTIN_CVTPS2PD512,
28358 IX86_BUILTIN_CVTPS2PH512,
28359 IX86_BUILTIN_CVTPS2UDQ512,
28360 IX86_BUILTIN_CVTSD2SS_ROUND,
28361 IX86_BUILTIN_CVTSI2SD64,
28362 IX86_BUILTIN_CVTSI2SS32,
28363 IX86_BUILTIN_CVTSI2SS64,
28364 IX86_BUILTIN_CVTSS2SD_ROUND,
28365 IX86_BUILTIN_CVTTPD2DQ512,
28366 IX86_BUILTIN_CVTTPD2UDQ512,
28367 IX86_BUILTIN_CVTTPS2DQ512,
28368 IX86_BUILTIN_CVTTPS2UDQ512,
28369 IX86_BUILTIN_CVTUDQ2PD512,
28370 IX86_BUILTIN_CVTUDQ2PS512,
28371 IX86_BUILTIN_CVTUSI2SD32,
28372 IX86_BUILTIN_CVTUSI2SD64,
28373 IX86_BUILTIN_CVTUSI2SS32,
28374 IX86_BUILTIN_CVTUSI2SS64,
28375 IX86_BUILTIN_DIVPD512,
28376 IX86_BUILTIN_DIVPS512,
28377 IX86_BUILTIN_DIVSD_ROUND,
28378 IX86_BUILTIN_DIVSS_ROUND,
28379 IX86_BUILTIN_EXPANDPD512,
28380 IX86_BUILTIN_EXPANDPD512Z,
28381 IX86_BUILTIN_EXPANDPDLOAD512,
28382 IX86_BUILTIN_EXPANDPDLOAD512Z,
28383 IX86_BUILTIN_EXPANDPS512,
28384 IX86_BUILTIN_EXPANDPS512Z,
28385 IX86_BUILTIN_EXPANDPSLOAD512,
28386 IX86_BUILTIN_EXPANDPSLOAD512Z,
28387 IX86_BUILTIN_EXTRACTF32X4,
28388 IX86_BUILTIN_EXTRACTF64X4,
28389 IX86_BUILTIN_EXTRACTI32X4,
28390 IX86_BUILTIN_EXTRACTI64X4,
28391 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28392 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28393 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28394 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28395 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28396 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28397 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28398 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28399 IX86_BUILTIN_GETEXPPD512,
28400 IX86_BUILTIN_GETEXPPS512,
28401 IX86_BUILTIN_GETEXPSD128,
28402 IX86_BUILTIN_GETEXPSS128,
28403 IX86_BUILTIN_GETMANTPD512,
28404 IX86_BUILTIN_GETMANTPS512,
28405 IX86_BUILTIN_GETMANTSD128,
28406 IX86_BUILTIN_GETMANTSS128,
28407 IX86_BUILTIN_INSERTF32X4,
28408 IX86_BUILTIN_INSERTF64X4,
28409 IX86_BUILTIN_INSERTI32X4,
28410 IX86_BUILTIN_INSERTI64X4,
28411 IX86_BUILTIN_LOADAPD512,
28412 IX86_BUILTIN_LOADAPS512,
28413 IX86_BUILTIN_LOADDQUDI512,
28414 IX86_BUILTIN_LOADDQUSI512,
28415 IX86_BUILTIN_LOADUPD512,
28416 IX86_BUILTIN_LOADUPS512,
28417 IX86_BUILTIN_MAXPD512,
28418 IX86_BUILTIN_MAXPS512,
28419 IX86_BUILTIN_MAXSD_ROUND,
28420 IX86_BUILTIN_MAXSS_ROUND,
28421 IX86_BUILTIN_MINPD512,
28422 IX86_BUILTIN_MINPS512,
28423 IX86_BUILTIN_MINSD_ROUND,
28424 IX86_BUILTIN_MINSS_ROUND,
28425 IX86_BUILTIN_MOVAPD512,
28426 IX86_BUILTIN_MOVAPS512,
28427 IX86_BUILTIN_MOVDDUP512,
28428 IX86_BUILTIN_MOVDQA32LOAD512,
28429 IX86_BUILTIN_MOVDQA32STORE512,
28430 IX86_BUILTIN_MOVDQA32_512,
28431 IX86_BUILTIN_MOVDQA64LOAD512,
28432 IX86_BUILTIN_MOVDQA64STORE512,
28433 IX86_BUILTIN_MOVDQA64_512,
28434 IX86_BUILTIN_MOVNTDQ512,
28435 IX86_BUILTIN_MOVNTDQA512,
28436 IX86_BUILTIN_MOVNTPD512,
28437 IX86_BUILTIN_MOVNTPS512,
28438 IX86_BUILTIN_MOVSHDUP512,
28439 IX86_BUILTIN_MOVSLDUP512,
28440 IX86_BUILTIN_MULPD512,
28441 IX86_BUILTIN_MULPS512,
28442 IX86_BUILTIN_MULSD_ROUND,
28443 IX86_BUILTIN_MULSS_ROUND,
28444 IX86_BUILTIN_PABSD512,
28445 IX86_BUILTIN_PABSQ512,
28446 IX86_BUILTIN_PADDD512,
28447 IX86_BUILTIN_PADDQ512,
28448 IX86_BUILTIN_PANDD512,
28449 IX86_BUILTIN_PANDND512,
28450 IX86_BUILTIN_PANDNQ512,
28451 IX86_BUILTIN_PANDQ512,
28452 IX86_BUILTIN_PBROADCASTD512,
28453 IX86_BUILTIN_PBROADCASTD512_GPR,
28454 IX86_BUILTIN_PBROADCASTMB512,
28455 IX86_BUILTIN_PBROADCASTMW512,
28456 IX86_BUILTIN_PBROADCASTQ512,
28457 IX86_BUILTIN_PBROADCASTQ512_GPR,
28458 IX86_BUILTIN_PBROADCASTQ512_MEM,
28459 IX86_BUILTIN_PCMPEQD512_MASK,
28460 IX86_BUILTIN_PCMPEQQ512_MASK,
28461 IX86_BUILTIN_PCMPGTD512_MASK,
28462 IX86_BUILTIN_PCMPGTQ512_MASK,
28463 IX86_BUILTIN_PCOMPRESSD512,
28464 IX86_BUILTIN_PCOMPRESSDSTORE512,
28465 IX86_BUILTIN_PCOMPRESSQ512,
28466 IX86_BUILTIN_PCOMPRESSQSTORE512,
28467 IX86_BUILTIN_PEXPANDD512,
28468 IX86_BUILTIN_PEXPANDD512Z,
28469 IX86_BUILTIN_PEXPANDDLOAD512,
28470 IX86_BUILTIN_PEXPANDDLOAD512Z,
28471 IX86_BUILTIN_PEXPANDQ512,
28472 IX86_BUILTIN_PEXPANDQ512Z,
28473 IX86_BUILTIN_PEXPANDQLOAD512,
28474 IX86_BUILTIN_PEXPANDQLOAD512Z,
28475 IX86_BUILTIN_PMAXSD512,
28476 IX86_BUILTIN_PMAXSQ512,
28477 IX86_BUILTIN_PMAXUD512,
28478 IX86_BUILTIN_PMAXUQ512,
28479 IX86_BUILTIN_PMINSD512,
28480 IX86_BUILTIN_PMINSQ512,
28481 IX86_BUILTIN_PMINUD512,
28482 IX86_BUILTIN_PMINUQ512,
28483 IX86_BUILTIN_PMOVDB512,
28484 IX86_BUILTIN_PMOVDB512_MEM,
28485 IX86_BUILTIN_PMOVDW512,
28486 IX86_BUILTIN_PMOVDW512_MEM,
28487 IX86_BUILTIN_PMOVQB512,
28488 IX86_BUILTIN_PMOVQB512_MEM,
28489 IX86_BUILTIN_PMOVQD512,
28490 IX86_BUILTIN_PMOVQD512_MEM,
28491 IX86_BUILTIN_PMOVQW512,
28492 IX86_BUILTIN_PMOVQW512_MEM,
28493 IX86_BUILTIN_PMOVSDB512,
28494 IX86_BUILTIN_PMOVSDB512_MEM,
28495 IX86_BUILTIN_PMOVSDW512,
28496 IX86_BUILTIN_PMOVSDW512_MEM,
28497 IX86_BUILTIN_PMOVSQB512,
28498 IX86_BUILTIN_PMOVSQB512_MEM,
28499 IX86_BUILTIN_PMOVSQD512,
28500 IX86_BUILTIN_PMOVSQD512_MEM,
28501 IX86_BUILTIN_PMOVSQW512,
28502 IX86_BUILTIN_PMOVSQW512_MEM,
28503 IX86_BUILTIN_PMOVSXBD512,
28504 IX86_BUILTIN_PMOVSXBQ512,
28505 IX86_BUILTIN_PMOVSXDQ512,
28506 IX86_BUILTIN_PMOVSXWD512,
28507 IX86_BUILTIN_PMOVSXWQ512,
28508 IX86_BUILTIN_PMOVUSDB512,
28509 IX86_BUILTIN_PMOVUSDB512_MEM,
28510 IX86_BUILTIN_PMOVUSDW512,
28511 IX86_BUILTIN_PMOVUSDW512_MEM,
28512 IX86_BUILTIN_PMOVUSQB512,
28513 IX86_BUILTIN_PMOVUSQB512_MEM,
28514 IX86_BUILTIN_PMOVUSQD512,
28515 IX86_BUILTIN_PMOVUSQD512_MEM,
28516 IX86_BUILTIN_PMOVUSQW512,
28517 IX86_BUILTIN_PMOVUSQW512_MEM,
28518 IX86_BUILTIN_PMOVZXBD512,
28519 IX86_BUILTIN_PMOVZXBQ512,
28520 IX86_BUILTIN_PMOVZXDQ512,
28521 IX86_BUILTIN_PMOVZXWD512,
28522 IX86_BUILTIN_PMOVZXWQ512,
28523 IX86_BUILTIN_PMULDQ512,
28524 IX86_BUILTIN_PMULLD512,
28525 IX86_BUILTIN_PMULUDQ512,
28526 IX86_BUILTIN_PORD512,
28527 IX86_BUILTIN_PORQ512,
28528 IX86_BUILTIN_PROLD512,
28529 IX86_BUILTIN_PROLQ512,
28530 IX86_BUILTIN_PROLVD512,
28531 IX86_BUILTIN_PROLVQ512,
28532 IX86_BUILTIN_PRORD512,
28533 IX86_BUILTIN_PRORQ512,
28534 IX86_BUILTIN_PRORVD512,
28535 IX86_BUILTIN_PRORVQ512,
28536 IX86_BUILTIN_PSHUFD512,
28537 IX86_BUILTIN_PSLLD512,
28538 IX86_BUILTIN_PSLLDI512,
28539 IX86_BUILTIN_PSLLQ512,
28540 IX86_BUILTIN_PSLLQI512,
28541 IX86_BUILTIN_PSLLVV16SI,
28542 IX86_BUILTIN_PSLLVV8DI,
28543 IX86_BUILTIN_PSRAD512,
28544 IX86_BUILTIN_PSRADI512,
28545 IX86_BUILTIN_PSRAQ512,
28546 IX86_BUILTIN_PSRAQI512,
28547 IX86_BUILTIN_PSRAVV16SI,
28548 IX86_BUILTIN_PSRAVV8DI,
28549 IX86_BUILTIN_PSRLD512,
28550 IX86_BUILTIN_PSRLDI512,
28551 IX86_BUILTIN_PSRLQ512,
28552 IX86_BUILTIN_PSRLQI512,
28553 IX86_BUILTIN_PSRLVV16SI,
28554 IX86_BUILTIN_PSRLVV8DI,
28555 IX86_BUILTIN_PSUBD512,
28556 IX86_BUILTIN_PSUBQ512,
28557 IX86_BUILTIN_PTESTMD512,
28558 IX86_BUILTIN_PTESTMQ512,
28559 IX86_BUILTIN_PTESTNMD512,
28560 IX86_BUILTIN_PTESTNMQ512,
28561 IX86_BUILTIN_PUNPCKHDQ512,
28562 IX86_BUILTIN_PUNPCKHQDQ512,
28563 IX86_BUILTIN_PUNPCKLDQ512,
28564 IX86_BUILTIN_PUNPCKLQDQ512,
28565 IX86_BUILTIN_PXORD512,
28566 IX86_BUILTIN_PXORQ512,
28567 IX86_BUILTIN_RCP14PD512,
28568 IX86_BUILTIN_RCP14PS512,
28569 IX86_BUILTIN_RCP14SD,
28570 IX86_BUILTIN_RCP14SS,
28571 IX86_BUILTIN_RNDSCALEPD,
28572 IX86_BUILTIN_RNDSCALEPS,
28573 IX86_BUILTIN_RNDSCALESD,
28574 IX86_BUILTIN_RNDSCALESS,
28575 IX86_BUILTIN_RSQRT14PD512,
28576 IX86_BUILTIN_RSQRT14PS512,
28577 IX86_BUILTIN_RSQRT14SD,
28578 IX86_BUILTIN_RSQRT14SS,
28579 IX86_BUILTIN_SCALEFPD512,
28580 IX86_BUILTIN_SCALEFPS512,
28581 IX86_BUILTIN_SCALEFSD,
28582 IX86_BUILTIN_SCALEFSS,
28583 IX86_BUILTIN_SHUFPD512,
28584 IX86_BUILTIN_SHUFPS512,
28585 IX86_BUILTIN_SHUF_F32x4,
28586 IX86_BUILTIN_SHUF_F64x2,
28587 IX86_BUILTIN_SHUF_I32x4,
28588 IX86_BUILTIN_SHUF_I64x2,
28589 IX86_BUILTIN_SQRTPD512,
28590 IX86_BUILTIN_SQRTPD512_MASK,
28591 IX86_BUILTIN_SQRTPS512_MASK,
28592 IX86_BUILTIN_SQRTPS_NR512,
28593 IX86_BUILTIN_SQRTSD_ROUND,
28594 IX86_BUILTIN_SQRTSS_ROUND,
28595 IX86_BUILTIN_STOREAPD512,
28596 IX86_BUILTIN_STOREAPS512,
28597 IX86_BUILTIN_STOREDQUDI512,
28598 IX86_BUILTIN_STOREDQUSI512,
28599 IX86_BUILTIN_STOREUPD512,
28600 IX86_BUILTIN_STOREUPS512,
28601 IX86_BUILTIN_SUBPD512,
28602 IX86_BUILTIN_SUBPS512,
28603 IX86_BUILTIN_SUBSD_ROUND,
28604 IX86_BUILTIN_SUBSS_ROUND,
28605 IX86_BUILTIN_UCMPD512,
28606 IX86_BUILTIN_UCMPQ512,
28607 IX86_BUILTIN_UNPCKHPD512,
28608 IX86_BUILTIN_UNPCKHPS512,
28609 IX86_BUILTIN_UNPCKLPD512,
28610 IX86_BUILTIN_UNPCKLPS512,
28611 IX86_BUILTIN_VCVTSD2SI32,
28612 IX86_BUILTIN_VCVTSD2SI64,
28613 IX86_BUILTIN_VCVTSD2USI32,
28614 IX86_BUILTIN_VCVTSD2USI64,
28615 IX86_BUILTIN_VCVTSS2SI32,
28616 IX86_BUILTIN_VCVTSS2SI64,
28617 IX86_BUILTIN_VCVTSS2USI32,
28618 IX86_BUILTIN_VCVTSS2USI64,
28619 IX86_BUILTIN_VCVTTSD2SI32,
28620 IX86_BUILTIN_VCVTTSD2SI64,
28621 IX86_BUILTIN_VCVTTSD2USI32,
28622 IX86_BUILTIN_VCVTTSD2USI64,
28623 IX86_BUILTIN_VCVTTSS2SI32,
28624 IX86_BUILTIN_VCVTTSS2SI64,
28625 IX86_BUILTIN_VCVTTSS2USI32,
28626 IX86_BUILTIN_VCVTTSS2USI64,
28627 IX86_BUILTIN_VFMADDPD512_MASK,
28628 IX86_BUILTIN_VFMADDPD512_MASK3,
28629 IX86_BUILTIN_VFMADDPD512_MASKZ,
28630 IX86_BUILTIN_VFMADDPS512_MASK,
28631 IX86_BUILTIN_VFMADDPS512_MASK3,
28632 IX86_BUILTIN_VFMADDPS512_MASKZ,
28633 IX86_BUILTIN_VFMADDSD3_ROUND,
28634 IX86_BUILTIN_VFMADDSS3_ROUND,
28635 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28636 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28637 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28638 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28639 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28640 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28641 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28642 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28643 IX86_BUILTIN_VFMSUBPD512_MASK3,
28644 IX86_BUILTIN_VFMSUBPS512_MASK3,
28645 IX86_BUILTIN_VFMSUBSD3_MASK3,
28646 IX86_BUILTIN_VFMSUBSS3_MASK3,
28647 IX86_BUILTIN_VFNMADDPD512_MASK,
28648 IX86_BUILTIN_VFNMADDPS512_MASK,
28649 IX86_BUILTIN_VFNMSUBPD512_MASK,
28650 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28651 IX86_BUILTIN_VFNMSUBPS512_MASK,
28652 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28653 IX86_BUILTIN_VPCLZCNTD512,
28654 IX86_BUILTIN_VPCLZCNTQ512,
28655 IX86_BUILTIN_VPCONFLICTD512,
28656 IX86_BUILTIN_VPCONFLICTQ512,
28657 IX86_BUILTIN_VPERMDF512,
28658 IX86_BUILTIN_VPERMDI512,
28659 IX86_BUILTIN_VPERMI2VARD512,
28660 IX86_BUILTIN_VPERMI2VARPD512,
28661 IX86_BUILTIN_VPERMI2VARPS512,
28662 IX86_BUILTIN_VPERMI2VARQ512,
28663 IX86_BUILTIN_VPERMILPD512,
28664 IX86_BUILTIN_VPERMILPS512,
28665 IX86_BUILTIN_VPERMILVARPD512,
28666 IX86_BUILTIN_VPERMILVARPS512,
28667 IX86_BUILTIN_VPERMT2VARD512,
28668 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28669 IX86_BUILTIN_VPERMT2VARPD512,
28670 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28671 IX86_BUILTIN_VPERMT2VARPS512,
28672 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28673 IX86_BUILTIN_VPERMT2VARQ512,
28674 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28675 IX86_BUILTIN_VPERMVARDF512,
28676 IX86_BUILTIN_VPERMVARDI512,
28677 IX86_BUILTIN_VPERMVARSF512,
28678 IX86_BUILTIN_VPERMVARSI512,
28679 IX86_BUILTIN_VTERNLOGD512_MASK,
28680 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28681 IX86_BUILTIN_VTERNLOGQ512_MASK,
28682 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28684 /* Mask arithmetic operations */
28685 IX86_BUILTIN_KAND16,
28686 IX86_BUILTIN_KANDN16,
28687 IX86_BUILTIN_KNOT16,
28688 IX86_BUILTIN_KOR16,
28689 IX86_BUILTIN_KORTESTC16,
28690 IX86_BUILTIN_KORTESTZ16,
28691 IX86_BUILTIN_KUNPCKBW,
28692 IX86_BUILTIN_KXNOR16,
28693 IX86_BUILTIN_KXOR16,
28694 IX86_BUILTIN_KMOV16,
28696 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28697 where all operands are 32-byte or 64-byte wide respectively. */
28698 IX86_BUILTIN_GATHERALTSIV4DF,
28699 IX86_BUILTIN_GATHERALTDIV8SF,
28700 IX86_BUILTIN_GATHERALTSIV4DI,
28701 IX86_BUILTIN_GATHERALTDIV8SI,
28702 IX86_BUILTIN_GATHER3ALTDIV16SF,
28703 IX86_BUILTIN_GATHER3ALTDIV16SI,
28704 IX86_BUILTIN_GATHER3ALTSIV8DF,
28705 IX86_BUILTIN_GATHER3ALTSIV8DI,
28706 IX86_BUILTIN_GATHER3DIV16SF,
28707 IX86_BUILTIN_GATHER3DIV16SI,
28708 IX86_BUILTIN_GATHER3DIV8DF,
28709 IX86_BUILTIN_GATHER3DIV8DI,
28710 IX86_BUILTIN_GATHER3SIV16SF,
28711 IX86_BUILTIN_GATHER3SIV16SI,
28712 IX86_BUILTIN_GATHER3SIV8DF,
28713 IX86_BUILTIN_GATHER3SIV8DI,
28714 IX86_BUILTIN_SCATTERDIV16SF,
28715 IX86_BUILTIN_SCATTERDIV16SI,
28716 IX86_BUILTIN_SCATTERDIV8DF,
28717 IX86_BUILTIN_SCATTERDIV8DI,
28718 IX86_BUILTIN_SCATTERSIV16SF,
28719 IX86_BUILTIN_SCATTERSIV16SI,
28720 IX86_BUILTIN_SCATTERSIV8DF,
28721 IX86_BUILTIN_SCATTERSIV8DI,
28723 /* AVX512PF */
28724 IX86_BUILTIN_GATHERPFQPD,
28725 IX86_BUILTIN_GATHERPFDPS,
28726 IX86_BUILTIN_GATHERPFDPD,
28727 IX86_BUILTIN_GATHERPFQPS,
28728 IX86_BUILTIN_SCATTERPFDPD,
28729 IX86_BUILTIN_SCATTERPFDPS,
28730 IX86_BUILTIN_SCATTERPFQPD,
28731 IX86_BUILTIN_SCATTERPFQPS,
28733 /* AVX-512ER */
28734 IX86_BUILTIN_EXP2PD_MASK,
28735 IX86_BUILTIN_EXP2PS_MASK,
28736 IX86_BUILTIN_EXP2PS,
28737 IX86_BUILTIN_RCP28PD,
28738 IX86_BUILTIN_RCP28PS,
28739 IX86_BUILTIN_RCP28SD,
28740 IX86_BUILTIN_RCP28SS,
28741 IX86_BUILTIN_RSQRT28PD,
28742 IX86_BUILTIN_RSQRT28PS,
28743 IX86_BUILTIN_RSQRT28SD,
28744 IX86_BUILTIN_RSQRT28SS,
28746 /* SHA builtins. */
28747 IX86_BUILTIN_SHA1MSG1,
28748 IX86_BUILTIN_SHA1MSG2,
28749 IX86_BUILTIN_SHA1NEXTE,
28750 IX86_BUILTIN_SHA1RNDS4,
28751 IX86_BUILTIN_SHA256MSG1,
28752 IX86_BUILTIN_SHA256MSG2,
28753 IX86_BUILTIN_SHA256RNDS2,
28755 /* TFmode support builtins. */
28756 IX86_BUILTIN_INFQ,
28757 IX86_BUILTIN_HUGE_VALQ,
28758 IX86_BUILTIN_FABSQ,
28759 IX86_BUILTIN_COPYSIGNQ,
28761 /* Vectorizer support builtins. */
28762 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28763 IX86_BUILTIN_CPYSGNPS,
28764 IX86_BUILTIN_CPYSGNPD,
28765 IX86_BUILTIN_CPYSGNPS256,
28766 IX86_BUILTIN_CPYSGNPS512,
28767 IX86_BUILTIN_CPYSGNPD256,
28768 IX86_BUILTIN_CPYSGNPD512,
28769 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28770 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28773 /* FMA4 instructions. */
28774 IX86_BUILTIN_VFMADDSS,
28775 IX86_BUILTIN_VFMADDSD,
28776 IX86_BUILTIN_VFMADDPS,
28777 IX86_BUILTIN_VFMADDPD,
28778 IX86_BUILTIN_VFMADDPS256,
28779 IX86_BUILTIN_VFMADDPD256,
28780 IX86_BUILTIN_VFMADDSUBPS,
28781 IX86_BUILTIN_VFMADDSUBPD,
28782 IX86_BUILTIN_VFMADDSUBPS256,
28783 IX86_BUILTIN_VFMADDSUBPD256,
28785 /* FMA3 instructions. */
28786 IX86_BUILTIN_VFMADDSS3,
28787 IX86_BUILTIN_VFMADDSD3,
28789 /* XOP instructions. */
28790 IX86_BUILTIN_VPCMOV,
28791 IX86_BUILTIN_VPCMOV_V2DI,
28792 IX86_BUILTIN_VPCMOV_V4SI,
28793 IX86_BUILTIN_VPCMOV_V8HI,
28794 IX86_BUILTIN_VPCMOV_V16QI,
28795 IX86_BUILTIN_VPCMOV_V4SF,
28796 IX86_BUILTIN_VPCMOV_V2DF,
28797 IX86_BUILTIN_VPCMOV256,
28798 IX86_BUILTIN_VPCMOV_V4DI256,
28799 IX86_BUILTIN_VPCMOV_V8SI256,
28800 IX86_BUILTIN_VPCMOV_V16HI256,
28801 IX86_BUILTIN_VPCMOV_V32QI256,
28802 IX86_BUILTIN_VPCMOV_V8SF256,
28803 IX86_BUILTIN_VPCMOV_V4DF256,
28805 IX86_BUILTIN_VPPERM,
28807 IX86_BUILTIN_VPMACSSWW,
28808 IX86_BUILTIN_VPMACSWW,
28809 IX86_BUILTIN_VPMACSSWD,
28810 IX86_BUILTIN_VPMACSWD,
28811 IX86_BUILTIN_VPMACSSDD,
28812 IX86_BUILTIN_VPMACSDD,
28813 IX86_BUILTIN_VPMACSSDQL,
28814 IX86_BUILTIN_VPMACSSDQH,
28815 IX86_BUILTIN_VPMACSDQL,
28816 IX86_BUILTIN_VPMACSDQH,
28817 IX86_BUILTIN_VPMADCSSWD,
28818 IX86_BUILTIN_VPMADCSWD,
28820 IX86_BUILTIN_VPHADDBW,
28821 IX86_BUILTIN_VPHADDBD,
28822 IX86_BUILTIN_VPHADDBQ,
28823 IX86_BUILTIN_VPHADDWD,
28824 IX86_BUILTIN_VPHADDWQ,
28825 IX86_BUILTIN_VPHADDDQ,
28826 IX86_BUILTIN_VPHADDUBW,
28827 IX86_BUILTIN_VPHADDUBD,
28828 IX86_BUILTIN_VPHADDUBQ,
28829 IX86_BUILTIN_VPHADDUWD,
28830 IX86_BUILTIN_VPHADDUWQ,
28831 IX86_BUILTIN_VPHADDUDQ,
28832 IX86_BUILTIN_VPHSUBBW,
28833 IX86_BUILTIN_VPHSUBWD,
28834 IX86_BUILTIN_VPHSUBDQ,
28836 IX86_BUILTIN_VPROTB,
28837 IX86_BUILTIN_VPROTW,
28838 IX86_BUILTIN_VPROTD,
28839 IX86_BUILTIN_VPROTQ,
28840 IX86_BUILTIN_VPROTB_IMM,
28841 IX86_BUILTIN_VPROTW_IMM,
28842 IX86_BUILTIN_VPROTD_IMM,
28843 IX86_BUILTIN_VPROTQ_IMM,
28845 IX86_BUILTIN_VPSHLB,
28846 IX86_BUILTIN_VPSHLW,
28847 IX86_BUILTIN_VPSHLD,
28848 IX86_BUILTIN_VPSHLQ,
28849 IX86_BUILTIN_VPSHAB,
28850 IX86_BUILTIN_VPSHAW,
28851 IX86_BUILTIN_VPSHAD,
28852 IX86_BUILTIN_VPSHAQ,
28854 IX86_BUILTIN_VFRCZSS,
28855 IX86_BUILTIN_VFRCZSD,
28856 IX86_BUILTIN_VFRCZPS,
28857 IX86_BUILTIN_VFRCZPD,
28858 IX86_BUILTIN_VFRCZPS256,
28859 IX86_BUILTIN_VFRCZPD256,
28861 IX86_BUILTIN_VPCOMEQUB,
28862 IX86_BUILTIN_VPCOMNEUB,
28863 IX86_BUILTIN_VPCOMLTUB,
28864 IX86_BUILTIN_VPCOMLEUB,
28865 IX86_BUILTIN_VPCOMGTUB,
28866 IX86_BUILTIN_VPCOMGEUB,
28867 IX86_BUILTIN_VPCOMFALSEUB,
28868 IX86_BUILTIN_VPCOMTRUEUB,
28870 IX86_BUILTIN_VPCOMEQUW,
28871 IX86_BUILTIN_VPCOMNEUW,
28872 IX86_BUILTIN_VPCOMLTUW,
28873 IX86_BUILTIN_VPCOMLEUW,
28874 IX86_BUILTIN_VPCOMGTUW,
28875 IX86_BUILTIN_VPCOMGEUW,
28876 IX86_BUILTIN_VPCOMFALSEUW,
28877 IX86_BUILTIN_VPCOMTRUEUW,
28879 IX86_BUILTIN_VPCOMEQUD,
28880 IX86_BUILTIN_VPCOMNEUD,
28881 IX86_BUILTIN_VPCOMLTUD,
28882 IX86_BUILTIN_VPCOMLEUD,
28883 IX86_BUILTIN_VPCOMGTUD,
28884 IX86_BUILTIN_VPCOMGEUD,
28885 IX86_BUILTIN_VPCOMFALSEUD,
28886 IX86_BUILTIN_VPCOMTRUEUD,
28888 IX86_BUILTIN_VPCOMEQUQ,
28889 IX86_BUILTIN_VPCOMNEUQ,
28890 IX86_BUILTIN_VPCOMLTUQ,
28891 IX86_BUILTIN_VPCOMLEUQ,
28892 IX86_BUILTIN_VPCOMGTUQ,
28893 IX86_BUILTIN_VPCOMGEUQ,
28894 IX86_BUILTIN_VPCOMFALSEUQ,
28895 IX86_BUILTIN_VPCOMTRUEUQ,
28897 IX86_BUILTIN_VPCOMEQB,
28898 IX86_BUILTIN_VPCOMNEB,
28899 IX86_BUILTIN_VPCOMLTB,
28900 IX86_BUILTIN_VPCOMLEB,
28901 IX86_BUILTIN_VPCOMGTB,
28902 IX86_BUILTIN_VPCOMGEB,
28903 IX86_BUILTIN_VPCOMFALSEB,
28904 IX86_BUILTIN_VPCOMTRUEB,
28906 IX86_BUILTIN_VPCOMEQW,
28907 IX86_BUILTIN_VPCOMNEW,
28908 IX86_BUILTIN_VPCOMLTW,
28909 IX86_BUILTIN_VPCOMLEW,
28910 IX86_BUILTIN_VPCOMGTW,
28911 IX86_BUILTIN_VPCOMGEW,
28912 IX86_BUILTIN_VPCOMFALSEW,
28913 IX86_BUILTIN_VPCOMTRUEW,
28915 IX86_BUILTIN_VPCOMEQD,
28916 IX86_BUILTIN_VPCOMNED,
28917 IX86_BUILTIN_VPCOMLTD,
28918 IX86_BUILTIN_VPCOMLED,
28919 IX86_BUILTIN_VPCOMGTD,
28920 IX86_BUILTIN_VPCOMGED,
28921 IX86_BUILTIN_VPCOMFALSED,
28922 IX86_BUILTIN_VPCOMTRUED,
28924 IX86_BUILTIN_VPCOMEQQ,
28925 IX86_BUILTIN_VPCOMNEQ,
28926 IX86_BUILTIN_VPCOMLTQ,
28927 IX86_BUILTIN_VPCOMLEQ,
28928 IX86_BUILTIN_VPCOMGTQ,
28929 IX86_BUILTIN_VPCOMGEQ,
28930 IX86_BUILTIN_VPCOMFALSEQ,
28931 IX86_BUILTIN_VPCOMTRUEQ,
28933 /* LWP instructions. */
28934 IX86_BUILTIN_LLWPCB,
28935 IX86_BUILTIN_SLWPCB,
28936 IX86_BUILTIN_LWPVAL32,
28937 IX86_BUILTIN_LWPVAL64,
28938 IX86_BUILTIN_LWPINS32,
28939 IX86_BUILTIN_LWPINS64,
28941 IX86_BUILTIN_CLZS,
28943 /* RTM */
28944 IX86_BUILTIN_XBEGIN,
28945 IX86_BUILTIN_XEND,
28946 IX86_BUILTIN_XABORT,
28947 IX86_BUILTIN_XTEST,
28949 /* MPX */
28950 IX86_BUILTIN_BNDMK,
28951 IX86_BUILTIN_BNDSTX,
28952 IX86_BUILTIN_BNDLDX,
28953 IX86_BUILTIN_BNDCL,
28954 IX86_BUILTIN_BNDCU,
28955 IX86_BUILTIN_BNDRET,
28956 IX86_BUILTIN_BNDNARROW,
28957 IX86_BUILTIN_BNDINT,
28958 IX86_BUILTIN_SIZEOF,
28959 IX86_BUILTIN_BNDLOWER,
28960 IX86_BUILTIN_BNDUPPER,
28962 /* BMI instructions. */
28963 IX86_BUILTIN_BEXTR32,
28964 IX86_BUILTIN_BEXTR64,
28965 IX86_BUILTIN_CTZS,
28967 /* TBM instructions. */
28968 IX86_BUILTIN_BEXTRI32,
28969 IX86_BUILTIN_BEXTRI64,
28971 /* BMI2 instructions. */
28972 IX86_BUILTIN_BZHI32,
28973 IX86_BUILTIN_BZHI64,
28974 IX86_BUILTIN_PDEP32,
28975 IX86_BUILTIN_PDEP64,
28976 IX86_BUILTIN_PEXT32,
28977 IX86_BUILTIN_PEXT64,
28979 /* ADX instructions. */
28980 IX86_BUILTIN_ADDCARRYX32,
28981 IX86_BUILTIN_ADDCARRYX64,
28983 /* FSGSBASE instructions. */
28984 IX86_BUILTIN_RDFSBASE32,
28985 IX86_BUILTIN_RDFSBASE64,
28986 IX86_BUILTIN_RDGSBASE32,
28987 IX86_BUILTIN_RDGSBASE64,
28988 IX86_BUILTIN_WRFSBASE32,
28989 IX86_BUILTIN_WRFSBASE64,
28990 IX86_BUILTIN_WRGSBASE32,
28991 IX86_BUILTIN_WRGSBASE64,
28993 /* RDRND instructions. */
28994 IX86_BUILTIN_RDRAND16_STEP,
28995 IX86_BUILTIN_RDRAND32_STEP,
28996 IX86_BUILTIN_RDRAND64_STEP,
28998 /* RDSEED instructions. */
28999 IX86_BUILTIN_RDSEED16_STEP,
29000 IX86_BUILTIN_RDSEED32_STEP,
29001 IX86_BUILTIN_RDSEED64_STEP,
29003 /* F16C instructions. */
29004 IX86_BUILTIN_CVTPH2PS,
29005 IX86_BUILTIN_CVTPH2PS256,
29006 IX86_BUILTIN_CVTPS2PH,
29007 IX86_BUILTIN_CVTPS2PH256,
29009 /* CFString built-in for darwin */
29010 IX86_BUILTIN_CFSTRING,
29012 /* Builtins to get CPU type and supported features. */
29013 IX86_BUILTIN_CPU_INIT,
29014 IX86_BUILTIN_CPU_IS,
29015 IX86_BUILTIN_CPU_SUPPORTS,
29017 /* Read/write FLAGS register built-ins. */
29018 IX86_BUILTIN_READ_FLAGS,
29019 IX86_BUILTIN_WRITE_FLAGS,
29021 IX86_BUILTIN_MAX
29024 /* Table for the ix86 builtin decls. */
29025 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
29027 /* Table of all of the builtin functions that are possible with different ISA's
29028 but are waiting to be built until a function is declared to use that
29029 ISA. */
29030 struct builtin_isa {
29031 const char *name; /* function name */
29032 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
29033 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
29034 bool const_p; /* true if the declaration is constant */
29035 bool leaf_p; /* true if the declaration has leaf attribute */
29036 bool nothrow_p; /* true if the declaration has nothrow attribute */
29037 bool set_and_not_built_p;
29040 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
29043 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
29044 of which isa_flags to use in the ix86_builtins_isa array. Stores the
29045 function decl in the ix86_builtins array. Returns the function decl or
29046 NULL_TREE, if the builtin was not added.
29048 If the front end has a special hook for builtin functions, delay adding
29049 builtin functions that aren't in the current ISA until the ISA is changed
29050 with function specific optimization. Doing so, can save about 300K for the
29051 default compiler. When the builtin is expanded, check at that time whether
29052 it is valid.
29054 If the front end doesn't have a special hook, record all builtins, even if
29055 it isn't an instruction set in the current ISA in case the user uses
29056 function specific options for a different ISA, so that we don't get scope
29057 errors if a builtin is added in the middle of a function scope. */
29059 static inline tree
29060 def_builtin (HOST_WIDE_INT mask, const char *name,
29061 enum ix86_builtin_func_type tcode,
29062 enum ix86_builtins code)
29064 tree decl = NULL_TREE;
29066 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
29068 ix86_builtins_isa[(int) code].isa = mask;
29070 mask &= ~OPTION_MASK_ISA_64BIT;
29071 if (mask == 0
29072 || (mask & ix86_isa_flags) != 0
29073 || (lang_hooks.builtin_function
29074 == lang_hooks.builtin_function_ext_scope))
29077 tree type = ix86_get_builtin_func_type (tcode);
29078 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29079 NULL, NULL_TREE);
29080 ix86_builtins[(int) code] = decl;
29081 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29083 else
29085 ix86_builtins[(int) code] = NULL_TREE;
29086 ix86_builtins_isa[(int) code].tcode = tcode;
29087 ix86_builtins_isa[(int) code].name = name;
29088 ix86_builtins_isa[(int) code].leaf_p = false;
29089 ix86_builtins_isa[(int) code].nothrow_p = false;
29090 ix86_builtins_isa[(int) code].const_p = false;
29091 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29095 return decl;
29098 /* Like def_builtin, but also marks the function decl "const". */
29100 static inline tree
29101 def_builtin_const (HOST_WIDE_INT mask, const char *name,
29102 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29104 tree decl = def_builtin (mask, name, tcode, code);
29105 if (decl)
29106 TREE_READONLY (decl) = 1;
29107 else
29108 ix86_builtins_isa[(int) code].const_p = true;
29110 return decl;
29113 /* Add any new builtin functions for a given ISA that may not have been
29114 declared. This saves a bit of space compared to adding all of the
29115 declarations to the tree, even if we didn't use them. */
29117 static void
29118 ix86_add_new_builtins (HOST_WIDE_INT isa)
29120 int i;
29122 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
29124 if ((ix86_builtins_isa[i].isa & isa) != 0
29125 && ix86_builtins_isa[i].set_and_not_built_p)
29127 tree decl, type;
29129 /* Don't define the builtin again. */
29130 ix86_builtins_isa[i].set_and_not_built_p = false;
29132 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
29133 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
29134 type, i, BUILT_IN_MD, NULL,
29135 NULL_TREE);
29137 ix86_builtins[i] = decl;
29138 if (ix86_builtins_isa[i].const_p)
29139 TREE_READONLY (decl) = 1;
29140 if (ix86_builtins_isa[i].leaf_p)
29141 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
29142 NULL_TREE);
29143 if (ix86_builtins_isa[i].nothrow_p)
29144 TREE_NOTHROW (decl) = 1;
29149 /* Bits for builtin_description.flag. */
29151 /* Set when we don't support the comparison natively, and should
29152 swap_comparison in order to support it. */
29153 #define BUILTIN_DESC_SWAP_OPERANDS 1
29155 struct builtin_description
29157 const HOST_WIDE_INT mask;
29158 const enum insn_code icode;
29159 const char *const name;
29160 const enum ix86_builtins code;
29161 const enum rtx_code comparison;
29162 const int flag;
29165 static const struct builtin_description bdesc_comi[] =
29167 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
29168 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
29169 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
29170 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
29171 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
29172 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
29173 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
29174 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
29175 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
29176 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
29177 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
29178 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
29179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
29180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
29181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
29182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
29183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
29184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
29185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
29186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
29187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
29188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
29189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
29190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
29193 static const struct builtin_description bdesc_pcmpestr[] =
29195 /* SSE4.2 */
29196 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
29197 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
29198 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
29199 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
29200 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
29201 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
29202 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
29205 static const struct builtin_description bdesc_pcmpistr[] =
29207 /* SSE4.2 */
29208 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
29209 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
29210 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
29211 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
29212 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
29213 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
29214 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
29217 /* Special builtins with variable number of arguments. */
29218 static const struct builtin_description bdesc_special_args[] =
29220 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
29221 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
29222 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
29224 /* 80387 (for use internally for atomic compound assignment). */
29225 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
29226 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
29227 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
29228 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
29230 /* MMX */
29231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29233 /* 3DNow! */
29234 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29236 /* FXSR, XSAVE and XSAVEOPT */
29237 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29238 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29239 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29240 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29241 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29243 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29244 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29245 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29246 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29247 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29249 /* SSE */
29250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29259 /* SSE or 3DNow!A */
29260 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29261 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29263 /* SSE2 */
29264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29265 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29267 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29269 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29270 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29271 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29272 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29278 /* SSE3 */
29279 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29281 /* SSE4.1 */
29282 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29284 /* SSE4A */
29285 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29286 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29288 /* AVX */
29289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29292 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29293 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29294 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29296 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29298 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29299 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29300 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29301 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29302 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29303 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29310 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29319 /* AVX2 */
29320 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29321 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29322 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29323 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29324 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29325 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29326 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29327 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29328 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29330 /* AVX512F */
29331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29379 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29380 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29381 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29382 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29383 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29384 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29386 /* FSGSBASE */
29387 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29388 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29389 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29390 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29391 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29392 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29393 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29394 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29396 /* RTM */
29397 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29398 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29399 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29402 /* Builtins with variable number of arguments. */
29403 static const struct builtin_description bdesc_args[] =
29405 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29406 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29407 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29408 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29409 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29410 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29411 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29413 /* MMX */
29414 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29416 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29417 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29418 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29419 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29421 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29422 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29423 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29424 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29425 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29426 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29427 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29428 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29430 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29431 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29433 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29434 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29435 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29436 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29438 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29439 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29440 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29441 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29442 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29443 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29445 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29446 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29447 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29448 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29449 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29450 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29452 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29453 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29454 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29456 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29458 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29459 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29460 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29461 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29462 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29463 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29465 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29466 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29467 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29468 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29469 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29470 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29472 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29473 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29474 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29477 /* 3DNow! */
29478 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29479 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29480 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29481 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29483 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29484 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29485 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29486 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29487 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29488 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29489 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29490 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29491 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29492 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29493 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29494 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29495 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29496 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29497 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29499 /* 3DNow!A */
29500 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29501 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29502 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29503 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29504 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29505 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29507 /* SSE */
29508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29510 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29511 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29512 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29515 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29516 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29517 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29518 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29519 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29521 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29523 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29524 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29525 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29526 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29527 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29528 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29529 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29530 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29532 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29534 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29535 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29536 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29537 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29539 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29540 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29541 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29542 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29543 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29544 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29545 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29546 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29547 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29548 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29549 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29550 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29551 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29553 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29554 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29555 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29556 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29558 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29559 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29560 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29561 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29563 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29568 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29569 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29571 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29573 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29575 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29578 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29581 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29582 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29584 /* SSE MMX or 3Dnow!A */
29585 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29586 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29587 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29589 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29590 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29591 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29592 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29594 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29595 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29597 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29599 /* SSE2 */
29600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29606 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29618 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29619 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29623 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29625 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29626 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29627 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29628 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29656 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29660 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29662 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29663 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29665 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29668 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29669 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29671 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29673 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29674 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29675 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29676 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29677 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29678 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29679 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29680 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29691 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29692 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29694 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29696 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29697 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29709 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29710 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29711 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29714 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29715 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29716 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29717 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29718 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29719 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29720 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29721 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29727 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29731 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29736 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29740 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29741 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29742 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29743 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29744 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29745 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29746 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29749 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29750 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29751 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29752 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29753 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29754 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29756 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29757 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29758 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29759 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29769 /* SSE2 MMX */
29770 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29771 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29773 /* SSE3 */
29774 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29775 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29777 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29778 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29779 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29780 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29781 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29782 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29784 /* SSSE3 */
29785 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29786 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29787 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29788 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29789 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29790 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29792 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29793 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29794 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29795 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29796 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29797 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29798 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29799 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29800 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29801 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29802 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29803 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29804 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29805 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29806 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29807 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29808 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29809 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29810 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29811 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29812 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29813 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29814 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29815 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29817 /* SSSE3. */
29818 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29819 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29821 /* SSE4.1 */
29822 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29823 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29824 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29825 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29826 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29827 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29828 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29829 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29830 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29831 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29833 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29834 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29835 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29836 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29837 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29838 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29839 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29840 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29841 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29842 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29843 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29844 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29845 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29847 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29848 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29849 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29850 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29851 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29852 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29853 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29854 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29855 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29856 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29857 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29858 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29860 /* SSE4.1 */
29861 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29862 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29863 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29864 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29866 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29867 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29868 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29869 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29871 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29872 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29874 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29875 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29877 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29878 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29879 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29880 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29882 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29883 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29885 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29886 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29888 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29889 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29890 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29892 /* SSE4.2 */
29893 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29894 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29895 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29896 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29897 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29899 /* SSE4A */
29900 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29901 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29902 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29903 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29905 /* AES */
29906 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29907 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29909 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29910 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29911 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29912 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29914 /* PCLMUL */
29915 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29917 /* AVX */
29918 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29919 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29922 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29923 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29926 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29932 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29933 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29934 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29935 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29936 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29937 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29938 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29939 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29940 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29941 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29942 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29943 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29947 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29948 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29950 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29951 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29952 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29954 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29955 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29956 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29957 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29958 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29959 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29960 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29962 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29963 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29964 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29965 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29966 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29967 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29968 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29971 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29973 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29974 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29989 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29991 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29993 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
30000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
30001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
30002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
30003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
30005 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
30006 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
30008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
30009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
30011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
30012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
30013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
30014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
30016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
30017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
30019 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
30020 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
30022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
30028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
30029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
30030 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
30031 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
30032 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
30034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
30035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
30036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
30037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
30038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
30039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
30040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
30041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
30042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
30043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
30044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
30045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
30046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
30051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
30053 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30054 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30056 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
30058 /* AVX2 */
30059 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
30060 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
30061 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
30062 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
30063 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
30064 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
30065 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
30066 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
30067 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30068 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30069 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30070 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30071 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30073 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30074 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30075 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
30076 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30077 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30078 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30079 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30080 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
30081 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
30082 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30083 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30084 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30085 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30086 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30088 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
30097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
30098 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30099 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30100 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30101 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30102 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30103 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30104 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30105 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30106 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30107 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30108 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30109 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30110 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
30111 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
30112 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
30113 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
30114 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
30115 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
30116 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
30117 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
30118 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
30119 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
30120 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
30121 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
30122 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
30123 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
30124 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30125 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30126 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30127 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30128 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30129 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
30130 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30131 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
30132 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30133 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
30134 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
30135 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
30136 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30137 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30138 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30139 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
30140 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30141 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30142 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30143 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30144 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
30145 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
30146 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30147 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30148 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30149 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30150 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
30151 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30152 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30153 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30154 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30155 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
30156 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
30157 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30158 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30159 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30160 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30161 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30162 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30163 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30164 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30165 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30166 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30167 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30168 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30169 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30170 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30171 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30172 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30173 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30174 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30175 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
30176 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
30177 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30178 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30179 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
30180 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
30181 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
30182 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
30183 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30184 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
30185 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
30186 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
30187 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30188 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30189 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
30190 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
30191 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
30192 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
30193 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
30194 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
30195 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30196 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30197 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30198 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30199 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30200 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30201 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30202 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30203 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30204 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30206 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30208 /* BMI */
30209 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30210 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30211 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30213 /* TBM */
30214 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30215 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30217 /* F16C */
30218 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
30219 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
30220 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
30221 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
30223 /* BMI2 */
30224 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30225 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30226 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30227 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30228 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30229 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30231 /* AVX512F */
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30281 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30282 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30284 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30285 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30387 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30391 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30393 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30394 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30395 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30396 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30402 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30403 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30404 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30405 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30406 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30407 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30408 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30409 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30410 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30411 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30412 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30413 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30414 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30415 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30416 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30417 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30418 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30419 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30420 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30421 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30422 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30424 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30425 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30426 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30427 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30428 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30429 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30430 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30431 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30433 /* Mask arithmetic operations */
30434 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30435 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30436 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30437 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30438 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30439 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30440 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30441 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30442 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30443 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30445 /* SHA */
30446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30455 /* Builtins with rounding support. */
30456 static const struct builtin_description bdesc_round_args[] =
30458 /* AVX512F */
30459 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30460 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30461 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30462 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30463 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30464 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30465 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30466 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30467 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30468 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30469 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30470 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30471 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30472 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30473 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30474 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30475 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30476 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30477 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30478 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30479 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30480 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30481 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30482 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30483 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30484 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30485 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30486 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30487 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30488 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30489 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30490 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30491 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30492 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30493 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30494 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30495 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30496 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30497 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30498 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30499 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30500 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30501 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30502 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30503 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30504 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30505 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30506 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30507 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30508 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30509 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30510 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30511 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30512 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30513 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30514 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30515 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30516 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30517 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30518 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30519 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30520 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30521 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30522 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30523 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30524 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30525 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30526 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30527 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30528 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30529 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30530 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30531 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30532 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30533 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30534 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30535 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30536 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30537 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30538 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30539 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30540 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30541 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30542 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30543 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30544 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30545 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30546 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30547 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30548 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30549 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30550 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30551 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30552 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30553 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30554 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30555 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30556 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30557 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30558 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30559 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30560 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30561 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30562 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30563 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30564 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30565 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30566 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30567 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30568 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30569 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30570 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30571 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30572 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30573 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30574 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30575 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30576 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30577 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30579 /* AVX512ER */
30580 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30581 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30582 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30583 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30584 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30585 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30586 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30587 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30588 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30589 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30592 /* Bultins for MPX. */
30593 static const struct builtin_description bdesc_mpx[] =
30595 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
30596 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
30597 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
30600 /* Const builtins for MPX. */
30601 static const struct builtin_description bdesc_mpx_const[] =
30603 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
30604 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
30605 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
30606 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
30607 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
30608 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
30609 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
30610 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
30613 /* FMA4 and XOP. */
30614 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30615 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30616 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30617 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30618 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30619 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30620 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30621 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30622 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30623 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30624 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30625 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30626 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30627 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30628 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30629 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30630 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30631 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30632 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30633 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30634 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30635 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30636 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30637 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30638 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30639 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30640 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30641 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30642 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30643 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30644 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30645 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30646 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30647 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30648 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30649 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30650 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30651 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30652 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30653 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30654 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30655 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30656 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30657 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30658 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30659 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30660 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30661 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30662 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30663 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30664 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30665 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30667 static const struct builtin_description bdesc_multi_arg[] =
30669 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30670 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30671 UNKNOWN, (int)MULTI_ARG_3_SF },
30672 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30673 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30674 UNKNOWN, (int)MULTI_ARG_3_DF },
30676 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30677 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30678 UNKNOWN, (int)MULTI_ARG_3_SF },
30679 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30680 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30681 UNKNOWN, (int)MULTI_ARG_3_DF },
30683 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30684 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30685 UNKNOWN, (int)MULTI_ARG_3_SF },
30686 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30687 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30688 UNKNOWN, (int)MULTI_ARG_3_DF },
30689 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30690 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30691 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30692 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30693 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30694 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30696 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30697 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30698 UNKNOWN, (int)MULTI_ARG_3_SF },
30699 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30700 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30701 UNKNOWN, (int)MULTI_ARG_3_DF },
30702 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30703 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30704 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30705 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30706 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30707 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30709 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30710 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30711 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30712 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30713 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30714 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30715 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30717 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30718 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30719 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30720 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30721 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30722 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30723 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30725 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30727 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30728 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30729 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30730 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30731 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30732 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30733 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30734 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30735 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30736 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30737 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30738 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30740 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30741 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30742 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30743 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30744 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30745 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30746 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30747 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30748 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30749 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30750 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30751 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30752 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30753 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30754 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30755 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30757 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30758 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30759 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30760 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30761 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30762 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30764 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30765 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30766 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30767 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30768 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30769 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30770 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30771 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30772 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30773 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30774 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30775 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30776 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30777 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30778 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30780 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30781 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30782 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30783 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30784 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30785 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30786 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30788 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30789 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30790 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30791 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30792 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30793 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30794 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30796 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30797 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30798 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30799 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30800 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30801 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30802 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30804 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30805 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30806 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30807 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30808 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30809 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30810 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30812 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30813 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30814 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30815 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30816 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30817 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30818 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30820 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30821 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30822 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30823 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30824 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30825 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30826 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30828 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30829 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30830 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30831 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30832 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30833 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30834 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30836 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30837 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30838 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30839 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30840 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30841 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30842 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30844 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30845 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30846 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30847 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30848 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30849 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30850 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30851 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30853 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30854 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30855 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30856 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30857 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30858 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30859 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30860 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30862 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30863 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30864 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30865 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30869 /* TM vector builtins. */
30871 /* Reuse the existing x86-specific `struct builtin_description' cause
30872 we're lazy. Add casts to make them fit. */
30873 static const struct builtin_description bdesc_tm[] =
30875 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30876 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30877 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30878 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30879 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30880 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30881 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30883 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30884 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30885 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30886 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30887 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30888 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30889 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30891 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30892 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30893 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30894 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30895 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30896 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30897 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30899 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30900 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30901 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30904 /* TM callbacks. */
30906 /* Return the builtin decl needed to load a vector of TYPE. */
30908 static tree
30909 ix86_builtin_tm_load (tree type)
30911 if (TREE_CODE (type) == VECTOR_TYPE)
30913 switch (tree_to_uhwi (TYPE_SIZE (type)))
30915 case 64:
30916 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30917 case 128:
30918 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30919 case 256:
30920 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30923 return NULL_TREE;
30926 /* Return the builtin decl needed to store a vector of TYPE. */
30928 static tree
30929 ix86_builtin_tm_store (tree type)
30931 if (TREE_CODE (type) == VECTOR_TYPE)
30933 switch (tree_to_uhwi (TYPE_SIZE (type)))
30935 case 64:
30936 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30937 case 128:
30938 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30939 case 256:
30940 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30943 return NULL_TREE;
30946 /* Initialize the transactional memory vector load/store builtins. */
30948 static void
30949 ix86_init_tm_builtins (void)
30951 enum ix86_builtin_func_type ftype;
30952 const struct builtin_description *d;
30953 size_t i;
30954 tree decl;
30955 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30956 tree attrs_log, attrs_type_log;
30958 if (!flag_tm)
30959 return;
30961 /* If there are no builtins defined, we must be compiling in a
30962 language without trans-mem support. */
30963 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30964 return;
30966 /* Use whatever attributes a normal TM load has. */
30967 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30968 attrs_load = DECL_ATTRIBUTES (decl);
30969 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30970 /* Use whatever attributes a normal TM store has. */
30971 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30972 attrs_store = DECL_ATTRIBUTES (decl);
30973 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30974 /* Use whatever attributes a normal TM log has. */
30975 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30976 attrs_log = DECL_ATTRIBUTES (decl);
30977 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30979 for (i = 0, d = bdesc_tm;
30980 i < ARRAY_SIZE (bdesc_tm);
30981 i++, d++)
30983 if ((d->mask & ix86_isa_flags) != 0
30984 || (lang_hooks.builtin_function
30985 == lang_hooks.builtin_function_ext_scope))
30987 tree type, attrs, attrs_type;
30988 enum built_in_function code = (enum built_in_function) d->code;
30990 ftype = (enum ix86_builtin_func_type) d->flag;
30991 type = ix86_get_builtin_func_type (ftype);
30993 if (BUILTIN_TM_LOAD_P (code))
30995 attrs = attrs_load;
30996 attrs_type = attrs_type_load;
30998 else if (BUILTIN_TM_STORE_P (code))
31000 attrs = attrs_store;
31001 attrs_type = attrs_type_store;
31003 else
31005 attrs = attrs_log;
31006 attrs_type = attrs_type_log;
31008 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
31009 /* The builtin without the prefix for
31010 calling it directly. */
31011 d->name + strlen ("__builtin_"),
31012 attrs);
31013 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
31014 set the TYPE_ATTRIBUTES. */
31015 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
31017 set_builtin_decl (code, decl, false);
31022 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
31023 in the current target ISA to allow the user to compile particular modules
31024 with different target specific options that differ from the command line
31025 options. */
31026 static void
31027 ix86_init_mmx_sse_builtins (void)
31029 const struct builtin_description * d;
31030 enum ix86_builtin_func_type ftype;
31031 size_t i;
31033 /* Add all special builtins with variable number of operands. */
31034 for (i = 0, d = bdesc_special_args;
31035 i < ARRAY_SIZE (bdesc_special_args);
31036 i++, d++)
31038 if (d->name == 0)
31039 continue;
31041 ftype = (enum ix86_builtin_func_type) d->flag;
31042 def_builtin (d->mask, d->name, ftype, d->code);
31045 /* Add all builtins with variable number of operands. */
31046 for (i = 0, d = bdesc_args;
31047 i < ARRAY_SIZE (bdesc_args);
31048 i++, d++)
31050 if (d->name == 0)
31051 continue;
31053 ftype = (enum ix86_builtin_func_type) d->flag;
31054 def_builtin_const (d->mask, d->name, ftype, d->code);
31057 /* Add all builtins with rounding. */
31058 for (i = 0, d = bdesc_round_args;
31059 i < ARRAY_SIZE (bdesc_round_args);
31060 i++, d++)
31062 if (d->name == 0)
31063 continue;
31065 ftype = (enum ix86_builtin_func_type) d->flag;
31066 def_builtin_const (d->mask, d->name, ftype, d->code);
31069 /* pcmpestr[im] insns. */
31070 for (i = 0, d = bdesc_pcmpestr;
31071 i < ARRAY_SIZE (bdesc_pcmpestr);
31072 i++, d++)
31074 if (d->code == IX86_BUILTIN_PCMPESTRM128)
31075 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
31076 else
31077 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
31078 def_builtin_const (d->mask, d->name, ftype, d->code);
31081 /* pcmpistr[im] insns. */
31082 for (i = 0, d = bdesc_pcmpistr;
31083 i < ARRAY_SIZE (bdesc_pcmpistr);
31084 i++, d++)
31086 if (d->code == IX86_BUILTIN_PCMPISTRM128)
31087 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
31088 else
31089 ftype = INT_FTYPE_V16QI_V16QI_INT;
31090 def_builtin_const (d->mask, d->name, ftype, d->code);
31093 /* comi/ucomi insns. */
31094 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
31096 if (d->mask == OPTION_MASK_ISA_SSE2)
31097 ftype = INT_FTYPE_V2DF_V2DF;
31098 else
31099 ftype = INT_FTYPE_V4SF_V4SF;
31100 def_builtin_const (d->mask, d->name, ftype, d->code);
31103 /* SSE */
31104 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
31105 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
31106 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
31107 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
31109 /* SSE or 3DNow!A */
31110 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31111 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
31112 IX86_BUILTIN_MASKMOVQ);
31114 /* SSE2 */
31115 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
31116 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
31118 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
31119 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
31120 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
31121 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
31123 /* SSE3. */
31124 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
31125 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
31126 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
31127 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
31129 /* AES */
31130 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
31131 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
31132 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
31133 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
31134 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
31135 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
31136 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
31137 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
31138 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
31139 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
31140 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
31141 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
31143 /* PCLMUL */
31144 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
31145 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
31147 /* RDRND */
31148 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
31149 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
31150 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
31151 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
31152 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
31153 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
31154 IX86_BUILTIN_RDRAND64_STEP);
31156 /* AVX2 */
31157 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
31158 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
31159 IX86_BUILTIN_GATHERSIV2DF);
31161 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
31162 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
31163 IX86_BUILTIN_GATHERSIV4DF);
31165 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
31166 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
31167 IX86_BUILTIN_GATHERDIV2DF);
31169 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
31170 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
31171 IX86_BUILTIN_GATHERDIV4DF);
31173 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
31174 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
31175 IX86_BUILTIN_GATHERSIV4SF);
31177 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
31178 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
31179 IX86_BUILTIN_GATHERSIV8SF);
31181 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
31182 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
31183 IX86_BUILTIN_GATHERDIV4SF);
31185 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
31186 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
31187 IX86_BUILTIN_GATHERDIV8SF);
31189 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
31190 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
31191 IX86_BUILTIN_GATHERSIV2DI);
31193 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
31194 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
31195 IX86_BUILTIN_GATHERSIV4DI);
31197 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
31198 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
31199 IX86_BUILTIN_GATHERDIV2DI);
31201 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
31202 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
31203 IX86_BUILTIN_GATHERDIV4DI);
31205 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
31206 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
31207 IX86_BUILTIN_GATHERSIV4SI);
31209 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31210 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31211 IX86_BUILTIN_GATHERSIV8SI);
31213 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31214 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31215 IX86_BUILTIN_GATHERDIV4SI);
31217 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31218 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31219 IX86_BUILTIN_GATHERDIV8SI);
31221 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31222 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31223 IX86_BUILTIN_GATHERALTSIV4DF);
31225 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31226 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31227 IX86_BUILTIN_GATHERALTDIV8SF);
31229 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31230 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31231 IX86_BUILTIN_GATHERALTSIV4DI);
31233 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31234 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31235 IX86_BUILTIN_GATHERALTDIV8SI);
31237 /* AVX512F */
31238 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31239 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
31240 IX86_BUILTIN_GATHER3SIV16SF);
31242 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31243 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31244 IX86_BUILTIN_GATHER3SIV8DF);
31246 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31247 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31248 IX86_BUILTIN_GATHER3DIV16SF);
31250 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31251 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31252 IX86_BUILTIN_GATHER3DIV8DF);
31254 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31255 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31256 IX86_BUILTIN_GATHER3SIV16SI);
31258 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31259 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31260 IX86_BUILTIN_GATHER3SIV8DI);
31262 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31263 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31264 IX86_BUILTIN_GATHER3DIV16SI);
31266 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31267 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31268 IX86_BUILTIN_GATHER3DIV8DI);
31270 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31271 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31272 IX86_BUILTIN_GATHER3ALTSIV8DF);
31274 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31275 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31276 IX86_BUILTIN_GATHER3ALTDIV16SF);
31278 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31279 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31280 IX86_BUILTIN_GATHER3ALTSIV8DI);
31282 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31283 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31284 IX86_BUILTIN_GATHER3ALTDIV16SI);
31286 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31287 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31288 IX86_BUILTIN_SCATTERSIV16SF);
31290 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31291 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31292 IX86_BUILTIN_SCATTERSIV8DF);
31294 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31295 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31296 IX86_BUILTIN_SCATTERDIV16SF);
31298 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31299 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31300 IX86_BUILTIN_SCATTERDIV8DF);
31302 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31303 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31304 IX86_BUILTIN_SCATTERSIV16SI);
31306 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31307 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31308 IX86_BUILTIN_SCATTERSIV8DI);
31310 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31311 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31312 IX86_BUILTIN_SCATTERDIV16SI);
31314 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31315 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31316 IX86_BUILTIN_SCATTERDIV8DI);
31318 /* AVX512PF */
31319 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31320 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31321 IX86_BUILTIN_GATHERPFDPD);
31322 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31323 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31324 IX86_BUILTIN_GATHERPFDPS);
31325 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31326 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31327 IX86_BUILTIN_GATHERPFQPD);
31328 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31329 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31330 IX86_BUILTIN_GATHERPFQPS);
31331 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31332 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31333 IX86_BUILTIN_SCATTERPFDPD);
31334 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31335 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31336 IX86_BUILTIN_SCATTERPFDPS);
31337 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31338 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31339 IX86_BUILTIN_SCATTERPFQPD);
31340 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31341 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31342 IX86_BUILTIN_SCATTERPFQPS);
31344 /* SHA */
31345 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31346 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31347 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31348 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31349 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31350 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31351 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31352 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31353 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31354 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31355 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31356 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31357 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31358 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31360 /* RTM. */
31361 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31362 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31364 /* MMX access to the vec_init patterns. */
31365 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31366 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31368 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31369 V4HI_FTYPE_HI_HI_HI_HI,
31370 IX86_BUILTIN_VEC_INIT_V4HI);
31372 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31373 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31374 IX86_BUILTIN_VEC_INIT_V8QI);
31376 /* Access to the vec_extract patterns. */
31377 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31378 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31379 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31380 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31381 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31382 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31383 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31384 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31385 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31386 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31388 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31389 "__builtin_ia32_vec_ext_v4hi",
31390 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31392 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31393 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31395 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31396 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31398 /* Access to the vec_set patterns. */
31399 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31400 "__builtin_ia32_vec_set_v2di",
31401 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31403 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31404 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31406 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31407 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31409 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31410 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31412 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31413 "__builtin_ia32_vec_set_v4hi",
31414 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31416 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31417 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31419 /* RDSEED */
31420 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31421 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31422 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31423 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31424 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31425 "__builtin_ia32_rdseed_di_step",
31426 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31428 /* ADCX */
31429 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31430 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31431 def_builtin (OPTION_MASK_ISA_64BIT,
31432 "__builtin_ia32_addcarryx_u64",
31433 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31434 IX86_BUILTIN_ADDCARRYX64);
31436 /* Read/write FLAGS. */
31437 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31438 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31439 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31440 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31441 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31442 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31443 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31444 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31447 /* Add FMA4 multi-arg argument instructions */
31448 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31450 if (d->name == 0)
31451 continue;
31453 ftype = (enum ix86_builtin_func_type) d->flag;
31454 def_builtin_const (d->mask, d->name, ftype, d->code);
31458 static void
31459 ix86_init_mpx_builtins ()
31461 const struct builtin_description * d;
31462 enum ix86_builtin_func_type ftype;
31463 tree decl;
31464 size_t i;
31466 for (i = 0, d = bdesc_mpx;
31467 i < ARRAY_SIZE (bdesc_mpx);
31468 i++, d++)
31470 if (d->name == 0)
31471 continue;
31473 ftype = (enum ix86_builtin_func_type) d->flag;
31474 decl = def_builtin (d->mask, d->name, ftype, d->code);
31476 /* With no leaf and nothrow flags for MPX builtins
31477 abnormal edges may follow its call when setjmp
31478 presents in the function. Since we may have a lot
31479 of MPX builtins calls it causes lots of useless
31480 edges and enormous PHI nodes. To avoid this we mark
31481 MPX builtins as leaf and nothrow. */
31482 if (decl)
31484 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
31485 NULL_TREE);
31486 TREE_NOTHROW (decl) = 1;
31488 else
31490 ix86_builtins_isa[(int)d->code].leaf_p = true;
31491 ix86_builtins_isa[(int)d->code].nothrow_p = true;
31495 for (i = 0, d = bdesc_mpx_const;
31496 i < ARRAY_SIZE (bdesc_mpx_const);
31497 i++, d++)
31499 if (d->name == 0)
31500 continue;
31502 ftype = (enum ix86_builtin_func_type) d->flag;
31503 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
31505 if (decl)
31507 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
31508 NULL_TREE);
31509 TREE_NOTHROW (decl) = 1;
31511 else
31513 ix86_builtins_isa[(int)d->code].leaf_p = true;
31514 ix86_builtins_isa[(int)d->code].nothrow_p = true;
31519 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31520 to return a pointer to VERSION_DECL if the outcome of the expression
31521 formed by PREDICATE_CHAIN is true. This function will be called during
31522 version dispatch to decide which function version to execute. It returns
31523 the basic block at the end, to which more conditions can be added. */
31525 static basic_block
31526 add_condition_to_bb (tree function_decl, tree version_decl,
31527 tree predicate_chain, basic_block new_bb)
31529 gimple return_stmt;
31530 tree convert_expr, result_var;
31531 gimple convert_stmt;
31532 gimple call_cond_stmt;
31533 gimple if_else_stmt;
31535 basic_block bb1, bb2, bb3;
31536 edge e12, e23;
31538 tree cond_var, and_expr_var = NULL_TREE;
31539 gimple_seq gseq;
31541 tree predicate_decl, predicate_arg;
31543 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31545 gcc_assert (new_bb != NULL);
31546 gseq = bb_seq (new_bb);
31549 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31550 build_fold_addr_expr (version_decl));
31551 result_var = create_tmp_var (ptr_type_node, NULL);
31552 convert_stmt = gimple_build_assign (result_var, convert_expr);
31553 return_stmt = gimple_build_return (result_var);
31555 if (predicate_chain == NULL_TREE)
31557 gimple_seq_add_stmt (&gseq, convert_stmt);
31558 gimple_seq_add_stmt (&gseq, return_stmt);
31559 set_bb_seq (new_bb, gseq);
31560 gimple_set_bb (convert_stmt, new_bb);
31561 gimple_set_bb (return_stmt, new_bb);
31562 pop_cfun ();
31563 return new_bb;
31566 while (predicate_chain != NULL)
31568 cond_var = create_tmp_var (integer_type_node, NULL);
31569 predicate_decl = TREE_PURPOSE (predicate_chain);
31570 predicate_arg = TREE_VALUE (predicate_chain);
31571 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31572 gimple_call_set_lhs (call_cond_stmt, cond_var);
31574 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31575 gimple_set_bb (call_cond_stmt, new_bb);
31576 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31578 predicate_chain = TREE_CHAIN (predicate_chain);
31580 if (and_expr_var == NULL)
31581 and_expr_var = cond_var;
31582 else
31584 gimple assign_stmt;
31585 /* Use MIN_EXPR to check if any integer is zero?.
31586 and_expr_var = min_expr <cond_var, and_expr_var> */
31587 assign_stmt = gimple_build_assign (and_expr_var,
31588 build2 (MIN_EXPR, integer_type_node,
31589 cond_var, and_expr_var));
31591 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31592 gimple_set_bb (assign_stmt, new_bb);
31593 gimple_seq_add_stmt (&gseq, assign_stmt);
31597 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31598 integer_zero_node,
31599 NULL_TREE, NULL_TREE);
31600 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31601 gimple_set_bb (if_else_stmt, new_bb);
31602 gimple_seq_add_stmt (&gseq, if_else_stmt);
31604 gimple_seq_add_stmt (&gseq, convert_stmt);
31605 gimple_seq_add_stmt (&gseq, return_stmt);
31606 set_bb_seq (new_bb, gseq);
31608 bb1 = new_bb;
31609 e12 = split_block (bb1, if_else_stmt);
31610 bb2 = e12->dest;
31611 e12->flags &= ~EDGE_FALLTHRU;
31612 e12->flags |= EDGE_TRUE_VALUE;
31614 e23 = split_block (bb2, return_stmt);
31616 gimple_set_bb (convert_stmt, bb2);
31617 gimple_set_bb (return_stmt, bb2);
31619 bb3 = e23->dest;
31620 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31622 remove_edge (e23);
31623 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31625 pop_cfun ();
31627 return bb3;
31630 /* This parses the attribute arguments to target in DECL and determines
31631 the right builtin to use to match the platform specification.
31632 It returns the priority value for this version decl. If PREDICATE_LIST
31633 is not NULL, it stores the list of cpu features that need to be checked
31634 before dispatching this function. */
31636 static unsigned int
31637 get_builtin_code_for_version (tree decl, tree *predicate_list)
31639 tree attrs;
31640 struct cl_target_option cur_target;
31641 tree target_node;
31642 struct cl_target_option *new_target;
31643 const char *arg_str = NULL;
31644 const char *attrs_str = NULL;
31645 char *tok_str = NULL;
31646 char *token;
31648 /* Priority of i386 features, greater value is higher priority. This is
31649 used to decide the order in which function dispatch must happen. For
31650 instance, a version specialized for SSE4.2 should be checked for dispatch
31651 before a version for SSE3, as SSE4.2 implies SSE3. */
31652 enum feature_priority
31654 P_ZERO = 0,
31655 P_MMX,
31656 P_SSE,
31657 P_SSE2,
31658 P_SSE3,
31659 P_SSSE3,
31660 P_PROC_SSSE3,
31661 P_SSE4_A,
31662 P_PROC_SSE4_A,
31663 P_SSE4_1,
31664 P_SSE4_2,
31665 P_PROC_SSE4_2,
31666 P_POPCNT,
31667 P_AVX,
31668 P_PROC_AVX,
31669 P_FMA4,
31670 P_XOP,
31671 P_PROC_XOP,
31672 P_FMA,
31673 P_PROC_FMA,
31674 P_AVX2,
31675 P_PROC_AVX2
31678 enum feature_priority priority = P_ZERO;
31680 /* These are the target attribute strings for which a dispatcher is
31681 available, from fold_builtin_cpu. */
31683 static struct _feature_list
31685 const char *const name;
31686 const enum feature_priority priority;
31688 const feature_list[] =
31690 {"mmx", P_MMX},
31691 {"sse", P_SSE},
31692 {"sse2", P_SSE2},
31693 {"sse3", P_SSE3},
31694 {"sse4a", P_SSE4_A},
31695 {"ssse3", P_SSSE3},
31696 {"sse4.1", P_SSE4_1},
31697 {"sse4.2", P_SSE4_2},
31698 {"popcnt", P_POPCNT},
31699 {"avx", P_AVX},
31700 {"fma4", P_FMA4},
31701 {"xop", P_XOP},
31702 {"fma", P_FMA},
31703 {"avx2", P_AVX2}
31707 static unsigned int NUM_FEATURES
31708 = sizeof (feature_list) / sizeof (struct _feature_list);
31710 unsigned int i;
31712 tree predicate_chain = NULL_TREE;
31713 tree predicate_decl, predicate_arg;
31715 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31716 gcc_assert (attrs != NULL);
31718 attrs = TREE_VALUE (TREE_VALUE (attrs));
31720 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31721 attrs_str = TREE_STRING_POINTER (attrs);
31723 /* Return priority zero for default function. */
31724 if (strcmp (attrs_str, "default") == 0)
31725 return 0;
31727 /* Handle arch= if specified. For priority, set it to be 1 more than
31728 the best instruction set the processor can handle. For instance, if
31729 there is a version for atom and a version for ssse3 (the highest ISA
31730 priority for atom), the atom version must be checked for dispatch
31731 before the ssse3 version. */
31732 if (strstr (attrs_str, "arch=") != NULL)
31734 cl_target_option_save (&cur_target, &global_options);
31735 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31736 &global_options_set);
31738 gcc_assert (target_node);
31739 new_target = TREE_TARGET_OPTION (target_node);
31740 gcc_assert (new_target);
31742 if (new_target->arch_specified && new_target->arch > 0)
31744 switch (new_target->arch)
31746 case PROCESSOR_CORE2:
31747 arg_str = "core2";
31748 priority = P_PROC_SSSE3;
31749 break;
31750 case PROCESSOR_NEHALEM:
31751 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31752 arg_str = "westmere";
31753 else
31754 /* We translate "arch=corei7" and "arch=nehalem" to
31755 "corei7" so that it will be mapped to M_INTEL_COREI7
31756 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31757 arg_str = "corei7";
31758 priority = P_PROC_SSE4_2;
31759 break;
31760 case PROCESSOR_SANDYBRIDGE:
31761 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31762 arg_str = "ivybridge";
31763 else
31764 arg_str = "sandybridge";
31765 priority = P_PROC_AVX;
31766 break;
31767 case PROCESSOR_HASWELL:
31768 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31769 arg_str = "broadwell";
31770 else
31771 arg_str = "haswell";
31772 priority = P_PROC_AVX2;
31773 break;
31774 case PROCESSOR_BONNELL:
31775 arg_str = "bonnell";
31776 priority = P_PROC_SSSE3;
31777 break;
31778 case PROCESSOR_SILVERMONT:
31779 arg_str = "silvermont";
31780 priority = P_PROC_SSE4_2;
31781 break;
31782 case PROCESSOR_AMDFAM10:
31783 arg_str = "amdfam10h";
31784 priority = P_PROC_SSE4_A;
31785 break;
31786 case PROCESSOR_BTVER1:
31787 arg_str = "btver1";
31788 priority = P_PROC_SSE4_A;
31789 break;
31790 case PROCESSOR_BTVER2:
31791 arg_str = "btver2";
31792 priority = P_PROC_AVX;
31793 break;
31794 case PROCESSOR_BDVER1:
31795 arg_str = "bdver1";
31796 priority = P_PROC_XOP;
31797 break;
31798 case PROCESSOR_BDVER2:
31799 arg_str = "bdver2";
31800 priority = P_PROC_FMA;
31801 break;
31802 case PROCESSOR_BDVER3:
31803 arg_str = "bdver3";
31804 priority = P_PROC_FMA;
31805 break;
31806 case PROCESSOR_BDVER4:
31807 arg_str = "bdver4";
31808 priority = P_PROC_AVX2;
31809 break;
31813 cl_target_option_restore (&global_options, &cur_target);
31815 if (predicate_list && arg_str == NULL)
31817 error_at (DECL_SOURCE_LOCATION (decl),
31818 "No dispatcher found for the versioning attributes");
31819 return 0;
31822 if (predicate_list)
31824 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31825 /* For a C string literal the length includes the trailing NULL. */
31826 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31827 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31828 predicate_chain);
31832 /* Process feature name. */
31833 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31834 strcpy (tok_str, attrs_str);
31835 token = strtok (tok_str, ",");
31836 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31838 while (token != NULL)
31840 /* Do not process "arch=" */
31841 if (strncmp (token, "arch=", 5) == 0)
31843 token = strtok (NULL, ",");
31844 continue;
31846 for (i = 0; i < NUM_FEATURES; ++i)
31848 if (strcmp (token, feature_list[i].name) == 0)
31850 if (predicate_list)
31852 predicate_arg = build_string_literal (
31853 strlen (feature_list[i].name) + 1,
31854 feature_list[i].name);
31855 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31856 predicate_chain);
31858 /* Find the maximum priority feature. */
31859 if (feature_list[i].priority > priority)
31860 priority = feature_list[i].priority;
31862 break;
31865 if (predicate_list && i == NUM_FEATURES)
31867 error_at (DECL_SOURCE_LOCATION (decl),
31868 "No dispatcher found for %s", token);
31869 return 0;
31871 token = strtok (NULL, ",");
31873 free (tok_str);
31875 if (predicate_list && predicate_chain == NULL_TREE)
31877 error_at (DECL_SOURCE_LOCATION (decl),
31878 "No dispatcher found for the versioning attributes : %s",
31879 attrs_str);
31880 return 0;
31882 else if (predicate_list)
31884 predicate_chain = nreverse (predicate_chain);
31885 *predicate_list = predicate_chain;
31888 return priority;
31891 /* This compares the priority of target features in function DECL1
31892 and DECL2. It returns positive value if DECL1 is higher priority,
31893 negative value if DECL2 is higher priority and 0 if they are the
31894 same. */
31896 static int
31897 ix86_compare_version_priority (tree decl1, tree decl2)
31899 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31900 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31902 return (int)priority1 - (int)priority2;
31905 /* V1 and V2 point to function versions with different priorities
31906 based on the target ISA. This function compares their priorities. */
31908 static int
31909 feature_compare (const void *v1, const void *v2)
31911 typedef struct _function_version_info
31913 tree version_decl;
31914 tree predicate_chain;
31915 unsigned int dispatch_priority;
31916 } function_version_info;
31918 const function_version_info c1 = *(const function_version_info *)v1;
31919 const function_version_info c2 = *(const function_version_info *)v2;
31920 return (c2.dispatch_priority - c1.dispatch_priority);
31923 /* This function generates the dispatch function for
31924 multi-versioned functions. DISPATCH_DECL is the function which will
31925 contain the dispatch logic. FNDECLS are the function choices for
31926 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31927 in DISPATCH_DECL in which the dispatch code is generated. */
31929 static int
31930 dispatch_function_versions (tree dispatch_decl,
31931 void *fndecls_p,
31932 basic_block *empty_bb)
31934 tree default_decl;
31935 gimple ifunc_cpu_init_stmt;
31936 gimple_seq gseq;
31937 int ix;
31938 tree ele;
31939 vec<tree> *fndecls;
31940 unsigned int num_versions = 0;
31941 unsigned int actual_versions = 0;
31942 unsigned int i;
31944 struct _function_version_info
31946 tree version_decl;
31947 tree predicate_chain;
31948 unsigned int dispatch_priority;
31949 }*function_version_info;
31951 gcc_assert (dispatch_decl != NULL
31952 && fndecls_p != NULL
31953 && empty_bb != NULL);
31955 /*fndecls_p is actually a vector. */
31956 fndecls = static_cast<vec<tree> *> (fndecls_p);
31958 /* At least one more version other than the default. */
31959 num_versions = fndecls->length ();
31960 gcc_assert (num_versions >= 2);
31962 function_version_info = (struct _function_version_info *)
31963 XNEWVEC (struct _function_version_info, (num_versions - 1));
31965 /* The first version in the vector is the default decl. */
31966 default_decl = (*fndecls)[0];
31968 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31970 gseq = bb_seq (*empty_bb);
31971 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31972 constructors, so explicity call __builtin_cpu_init here. */
31973 ifunc_cpu_init_stmt = gimple_build_call_vec (
31974 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31975 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31976 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31977 set_bb_seq (*empty_bb, gseq);
31979 pop_cfun ();
31982 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31984 tree version_decl = ele;
31985 tree predicate_chain = NULL_TREE;
31986 unsigned int priority;
31987 /* Get attribute string, parse it and find the right predicate decl.
31988 The predicate function could be a lengthy combination of many
31989 features, like arch-type and various isa-variants. */
31990 priority = get_builtin_code_for_version (version_decl,
31991 &predicate_chain);
31993 if (predicate_chain == NULL_TREE)
31994 continue;
31996 function_version_info [actual_versions].version_decl = version_decl;
31997 function_version_info [actual_versions].predicate_chain
31998 = predicate_chain;
31999 function_version_info [actual_versions].dispatch_priority = priority;
32000 actual_versions++;
32003 /* Sort the versions according to descending order of dispatch priority. The
32004 priority is based on the ISA. This is not a perfect solution. There
32005 could still be ambiguity. If more than one function version is suitable
32006 to execute, which one should be dispatched? In future, allow the user
32007 to specify a dispatch priority next to the version. */
32008 qsort (function_version_info, actual_versions,
32009 sizeof (struct _function_version_info), feature_compare);
32011 for (i = 0; i < actual_versions; ++i)
32012 *empty_bb = add_condition_to_bb (dispatch_decl,
32013 function_version_info[i].version_decl,
32014 function_version_info[i].predicate_chain,
32015 *empty_bb);
32017 /* dispatch default version at the end. */
32018 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
32019 NULL, *empty_bb);
32021 free (function_version_info);
32022 return 0;
32025 /* Comparator function to be used in qsort routine to sort attribute
32026 specification strings to "target". */
32028 static int
32029 attr_strcmp (const void *v1, const void *v2)
32031 const char *c1 = *(char *const*)v1;
32032 const char *c2 = *(char *const*)v2;
32033 return strcmp (c1, c2);
32036 /* ARGLIST is the argument to target attribute. This function tokenizes
32037 the comma separated arguments, sorts them and returns a string which
32038 is a unique identifier for the comma separated arguments. It also
32039 replaces non-identifier characters "=,-" with "_". */
32041 static char *
32042 sorted_attr_string (tree arglist)
32044 tree arg;
32045 size_t str_len_sum = 0;
32046 char **args = NULL;
32047 char *attr_str, *ret_str;
32048 char *attr = NULL;
32049 unsigned int argnum = 1;
32050 unsigned int i;
32052 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
32054 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
32055 size_t len = strlen (str);
32056 str_len_sum += len + 1;
32057 if (arg != arglist)
32058 argnum++;
32059 for (i = 0; i < strlen (str); i++)
32060 if (str[i] == ',')
32061 argnum++;
32064 attr_str = XNEWVEC (char, str_len_sum);
32065 str_len_sum = 0;
32066 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
32068 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
32069 size_t len = strlen (str);
32070 memcpy (attr_str + str_len_sum, str, len);
32071 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
32072 str_len_sum += len + 1;
32075 /* Replace "=,-" with "_". */
32076 for (i = 0; i < strlen (attr_str); i++)
32077 if (attr_str[i] == '=' || attr_str[i]== '-')
32078 attr_str[i] = '_';
32080 if (argnum == 1)
32081 return attr_str;
32083 args = XNEWVEC (char *, argnum);
32085 i = 0;
32086 attr = strtok (attr_str, ",");
32087 while (attr != NULL)
32089 args[i] = attr;
32090 i++;
32091 attr = strtok (NULL, ",");
32094 qsort (args, argnum, sizeof (char *), attr_strcmp);
32096 ret_str = XNEWVEC (char, str_len_sum);
32097 str_len_sum = 0;
32098 for (i = 0; i < argnum; i++)
32100 size_t len = strlen (args[i]);
32101 memcpy (ret_str + str_len_sum, args[i], len);
32102 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
32103 str_len_sum += len + 1;
32106 XDELETEVEC (args);
32107 XDELETEVEC (attr_str);
32108 return ret_str;
32111 /* This function changes the assembler name for functions that are
32112 versions. If DECL is a function version and has a "target"
32113 attribute, it appends the attribute string to its assembler name. */
32115 static tree
32116 ix86_mangle_function_version_assembler_name (tree decl, tree id)
32118 tree version_attr;
32119 const char *orig_name, *version_string;
32120 char *attr_str, *assembler_name;
32122 if (DECL_DECLARED_INLINE_P (decl)
32123 && lookup_attribute ("gnu_inline",
32124 DECL_ATTRIBUTES (decl)))
32125 error_at (DECL_SOURCE_LOCATION (decl),
32126 "Function versions cannot be marked as gnu_inline,"
32127 " bodies have to be generated");
32129 if (DECL_VIRTUAL_P (decl)
32130 || DECL_VINDEX (decl))
32131 sorry ("Virtual function multiversioning not supported");
32133 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32135 /* target attribute string cannot be NULL. */
32136 gcc_assert (version_attr != NULL_TREE);
32138 orig_name = IDENTIFIER_POINTER (id);
32139 version_string
32140 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
32142 if (strcmp (version_string, "default") == 0)
32143 return id;
32145 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
32146 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
32148 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
32150 /* Allow assembler name to be modified if already set. */
32151 if (DECL_ASSEMBLER_NAME_SET_P (decl))
32152 SET_DECL_RTL (decl, NULL);
32154 tree ret = get_identifier (assembler_name);
32155 XDELETEVEC (attr_str);
32156 XDELETEVEC (assembler_name);
32157 return ret;
32160 /* This function returns true if FN1 and FN2 are versions of the same function,
32161 that is, the target strings of the function decls are different. This assumes
32162 that FN1 and FN2 have the same signature. */
32164 static bool
32165 ix86_function_versions (tree fn1, tree fn2)
32167 tree attr1, attr2;
32168 char *target1, *target2;
32169 bool result;
32171 if (TREE_CODE (fn1) != FUNCTION_DECL
32172 || TREE_CODE (fn2) != FUNCTION_DECL)
32173 return false;
32175 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
32176 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
32178 /* At least one function decl should have the target attribute specified. */
32179 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
32180 return false;
32182 /* Diagnose missing target attribute if one of the decls is already
32183 multi-versioned. */
32184 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
32186 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
32188 if (attr2 != NULL_TREE)
32190 tree tem = fn1;
32191 fn1 = fn2;
32192 fn2 = tem;
32193 attr1 = attr2;
32195 error_at (DECL_SOURCE_LOCATION (fn2),
32196 "missing %<target%> attribute for multi-versioned %D",
32197 fn2);
32198 inform (DECL_SOURCE_LOCATION (fn1),
32199 "previous declaration of %D", fn1);
32200 /* Prevent diagnosing of the same error multiple times. */
32201 DECL_ATTRIBUTES (fn2)
32202 = tree_cons (get_identifier ("target"),
32203 copy_node (TREE_VALUE (attr1)),
32204 DECL_ATTRIBUTES (fn2));
32206 return false;
32209 target1 = sorted_attr_string (TREE_VALUE (attr1));
32210 target2 = sorted_attr_string (TREE_VALUE (attr2));
32212 /* The sorted target strings must be different for fn1 and fn2
32213 to be versions. */
32214 if (strcmp (target1, target2) == 0)
32215 result = false;
32216 else
32217 result = true;
32219 XDELETEVEC (target1);
32220 XDELETEVEC (target2);
32222 return result;
32225 static tree
32226 ix86_mangle_decl_assembler_name (tree decl, tree id)
32228 /* For function version, add the target suffix to the assembler name. */
32229 if (TREE_CODE (decl) == FUNCTION_DECL
32230 && DECL_FUNCTION_VERSIONED (decl))
32231 id = ix86_mangle_function_version_assembler_name (decl, id);
32232 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
32233 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
32234 #endif
32236 return id;
32239 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
32240 is true, append the full path name of the source file. */
32242 static char *
32243 make_name (tree decl, const char *suffix, bool make_unique)
32245 char *global_var_name;
32246 int name_len;
32247 const char *name;
32248 const char *unique_name = NULL;
32250 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
32252 /* Get a unique name that can be used globally without any chances
32253 of collision at link time. */
32254 if (make_unique)
32255 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
32257 name_len = strlen (name) + strlen (suffix) + 2;
32259 if (make_unique)
32260 name_len += strlen (unique_name) + 1;
32261 global_var_name = XNEWVEC (char, name_len);
32263 /* Use '.' to concatenate names as it is demangler friendly. */
32264 if (make_unique)
32265 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
32266 suffix);
32267 else
32268 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
32270 return global_var_name;
32273 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32275 /* Make a dispatcher declaration for the multi-versioned function DECL.
32276 Calls to DECL function will be replaced with calls to the dispatcher
32277 by the front-end. Return the decl created. */
32279 static tree
32280 make_dispatcher_decl (const tree decl)
32282 tree func_decl;
32283 char *func_name;
32284 tree fn_type, func_type;
32285 bool is_uniq = false;
32287 if (TREE_PUBLIC (decl) == 0)
32288 is_uniq = true;
32290 func_name = make_name (decl, "ifunc", is_uniq);
32292 fn_type = TREE_TYPE (decl);
32293 func_type = build_function_type (TREE_TYPE (fn_type),
32294 TYPE_ARG_TYPES (fn_type));
32296 func_decl = build_fn_decl (func_name, func_type);
32297 XDELETEVEC (func_name);
32298 TREE_USED (func_decl) = 1;
32299 DECL_CONTEXT (func_decl) = NULL_TREE;
32300 DECL_INITIAL (func_decl) = error_mark_node;
32301 DECL_ARTIFICIAL (func_decl) = 1;
32302 /* Mark this func as external, the resolver will flip it again if
32303 it gets generated. */
32304 DECL_EXTERNAL (func_decl) = 1;
32305 /* This will be of type IFUNCs have to be externally visible. */
32306 TREE_PUBLIC (func_decl) = 1;
32308 return func_decl;
32311 #endif
32313 /* Returns true if decl is multi-versioned and DECL is the default function,
32314 that is it is not tagged with target specific optimization. */
32316 static bool
32317 is_function_default_version (const tree decl)
32319 if (TREE_CODE (decl) != FUNCTION_DECL
32320 || !DECL_FUNCTION_VERSIONED (decl))
32321 return false;
32322 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32323 gcc_assert (attr);
32324 attr = TREE_VALUE (TREE_VALUE (attr));
32325 return (TREE_CODE (attr) == STRING_CST
32326 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32329 /* Make a dispatcher declaration for the multi-versioned function DECL.
32330 Calls to DECL function will be replaced with calls to the dispatcher
32331 by the front-end. Returns the decl of the dispatcher function. */
32333 static tree
32334 ix86_get_function_versions_dispatcher (void *decl)
32336 tree fn = (tree) decl;
32337 struct cgraph_node *node = NULL;
32338 struct cgraph_node *default_node = NULL;
32339 struct cgraph_function_version_info *node_v = NULL;
32340 struct cgraph_function_version_info *first_v = NULL;
32342 tree dispatch_decl = NULL;
32344 struct cgraph_function_version_info *default_version_info = NULL;
32346 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32348 node = cgraph_get_node (fn);
32349 gcc_assert (node != NULL);
32351 node_v = get_cgraph_node_version (node);
32352 gcc_assert (node_v != NULL);
32354 if (node_v->dispatcher_resolver != NULL)
32355 return node_v->dispatcher_resolver;
32357 /* Find the default version and make it the first node. */
32358 first_v = node_v;
32359 /* Go to the beginning of the chain. */
32360 while (first_v->prev != NULL)
32361 first_v = first_v->prev;
32362 default_version_info = first_v;
32363 while (default_version_info != NULL)
32365 if (is_function_default_version
32366 (default_version_info->this_node->decl))
32367 break;
32368 default_version_info = default_version_info->next;
32371 /* If there is no default node, just return NULL. */
32372 if (default_version_info == NULL)
32373 return NULL;
32375 /* Make default info the first node. */
32376 if (first_v != default_version_info)
32378 default_version_info->prev->next = default_version_info->next;
32379 if (default_version_info->next)
32380 default_version_info->next->prev = default_version_info->prev;
32381 first_v->prev = default_version_info;
32382 default_version_info->next = first_v;
32383 default_version_info->prev = NULL;
32386 default_node = default_version_info->this_node;
32388 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32389 if (targetm.has_ifunc_p ())
32391 struct cgraph_function_version_info *it_v = NULL;
32392 struct cgraph_node *dispatcher_node = NULL;
32393 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32395 /* Right now, the dispatching is done via ifunc. */
32396 dispatch_decl = make_dispatcher_decl (default_node->decl);
32398 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32399 gcc_assert (dispatcher_node != NULL);
32400 dispatcher_node->dispatcher_function = 1;
32401 dispatcher_version_info
32402 = insert_new_cgraph_node_version (dispatcher_node);
32403 dispatcher_version_info->next = default_version_info;
32404 dispatcher_node->definition = 1;
32406 /* Set the dispatcher for all the versions. */
32407 it_v = default_version_info;
32408 while (it_v != NULL)
32410 it_v->dispatcher_resolver = dispatch_decl;
32411 it_v = it_v->next;
32414 else
32415 #endif
32417 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32418 "multiversioning needs ifunc which is not supported "
32419 "on this target");
32422 return dispatch_decl;
32425 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32426 it to CHAIN. */
32428 static tree
32429 make_attribute (const char *name, const char *arg_name, tree chain)
32431 tree attr_name;
32432 tree attr_arg_name;
32433 tree attr_args;
32434 tree attr;
32436 attr_name = get_identifier (name);
32437 attr_arg_name = build_string (strlen (arg_name), arg_name);
32438 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32439 attr = tree_cons (attr_name, attr_args, chain);
32440 return attr;
32443 /* Make the resolver function decl to dispatch the versions of
32444 a multi-versioned function, DEFAULT_DECL. Create an
32445 empty basic block in the resolver and store the pointer in
32446 EMPTY_BB. Return the decl of the resolver function. */
32448 static tree
32449 make_resolver_func (const tree default_decl,
32450 const tree dispatch_decl,
32451 basic_block *empty_bb)
32453 char *resolver_name;
32454 tree decl, type, decl_name, t;
32455 bool is_uniq = false;
32457 /* IFUNC's have to be globally visible. So, if the default_decl is
32458 not, then the name of the IFUNC should be made unique. */
32459 if (TREE_PUBLIC (default_decl) == 0)
32460 is_uniq = true;
32462 /* Append the filename to the resolver function if the versions are
32463 not externally visible. This is because the resolver function has
32464 to be externally visible for the loader to find it. So, appending
32465 the filename will prevent conflicts with a resolver function from
32466 another module which is based on the same version name. */
32467 resolver_name = make_name (default_decl, "resolver", is_uniq);
32469 /* The resolver function should return a (void *). */
32470 type = build_function_type_list (ptr_type_node, NULL_TREE);
32472 decl = build_fn_decl (resolver_name, type);
32473 decl_name = get_identifier (resolver_name);
32474 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32476 DECL_NAME (decl) = decl_name;
32477 TREE_USED (decl) = 1;
32478 DECL_ARTIFICIAL (decl) = 1;
32479 DECL_IGNORED_P (decl) = 0;
32480 /* IFUNC resolvers have to be externally visible. */
32481 TREE_PUBLIC (decl) = 1;
32482 DECL_UNINLINABLE (decl) = 1;
32484 /* Resolver is not external, body is generated. */
32485 DECL_EXTERNAL (decl) = 0;
32486 DECL_EXTERNAL (dispatch_decl) = 0;
32488 DECL_CONTEXT (decl) = NULL_TREE;
32489 DECL_INITIAL (decl) = make_node (BLOCK);
32490 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32492 if (DECL_COMDAT_GROUP (default_decl)
32493 || TREE_PUBLIC (default_decl))
32495 /* In this case, each translation unit with a call to this
32496 versioned function will put out a resolver. Ensure it
32497 is comdat to keep just one copy. */
32498 DECL_COMDAT (decl) = 1;
32499 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32501 /* Build result decl and add to function_decl. */
32502 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32503 DECL_ARTIFICIAL (t) = 1;
32504 DECL_IGNORED_P (t) = 1;
32505 DECL_RESULT (decl) = t;
32507 gimplify_function_tree (decl);
32508 push_cfun (DECL_STRUCT_FUNCTION (decl));
32509 *empty_bb = init_lowered_empty_function (decl, false);
32511 cgraph_add_new_function (decl, true);
32512 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32514 pop_cfun ();
32516 gcc_assert (dispatch_decl != NULL);
32517 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32518 DECL_ATTRIBUTES (dispatch_decl)
32519 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32521 /* Create the alias for dispatch to resolver here. */
32522 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32523 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32524 XDELETEVEC (resolver_name);
32525 return decl;
32528 /* Generate the dispatching code body to dispatch multi-versioned function
32529 DECL. The target hook is called to process the "target" attributes and
32530 provide the code to dispatch the right function at run-time. NODE points
32531 to the dispatcher decl whose body will be created. */
32533 static tree
32534 ix86_generate_version_dispatcher_body (void *node_p)
32536 tree resolver_decl;
32537 basic_block empty_bb;
32538 tree default_ver_decl;
32539 struct cgraph_node *versn;
32540 struct cgraph_node *node;
32542 struct cgraph_function_version_info *node_version_info = NULL;
32543 struct cgraph_function_version_info *versn_info = NULL;
32545 node = (cgraph_node *)node_p;
32547 node_version_info = get_cgraph_node_version (node);
32548 gcc_assert (node->dispatcher_function
32549 && node_version_info != NULL);
32551 if (node_version_info->dispatcher_resolver)
32552 return node_version_info->dispatcher_resolver;
32554 /* The first version in the chain corresponds to the default version. */
32555 default_ver_decl = node_version_info->next->this_node->decl;
32557 /* node is going to be an alias, so remove the finalized bit. */
32558 node->definition = false;
32560 resolver_decl = make_resolver_func (default_ver_decl,
32561 node->decl, &empty_bb);
32563 node_version_info->dispatcher_resolver = resolver_decl;
32565 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32567 auto_vec<tree, 2> fn_ver_vec;
32569 for (versn_info = node_version_info->next; versn_info;
32570 versn_info = versn_info->next)
32572 versn = versn_info->this_node;
32573 /* Check for virtual functions here again, as by this time it should
32574 have been determined if this function needs a vtable index or
32575 not. This happens for methods in derived classes that override
32576 virtual methods in base classes but are not explicitly marked as
32577 virtual. */
32578 if (DECL_VINDEX (versn->decl))
32579 sorry ("Virtual function multiversioning not supported");
32581 fn_ver_vec.safe_push (versn->decl);
32584 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32585 rebuild_cgraph_edges ();
32586 pop_cfun ();
32587 return resolver_decl;
32589 /* This builds the processor_model struct type defined in
32590 libgcc/config/i386/cpuinfo.c */
32592 static tree
32593 build_processor_model_struct (void)
32595 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32596 "__cpu_features"};
32597 tree field = NULL_TREE, field_chain = NULL_TREE;
32598 int i;
32599 tree type = make_node (RECORD_TYPE);
32601 /* The first 3 fields are unsigned int. */
32602 for (i = 0; i < 3; ++i)
32604 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32605 get_identifier (field_name[i]), unsigned_type_node);
32606 if (field_chain != NULL_TREE)
32607 DECL_CHAIN (field) = field_chain;
32608 field_chain = field;
32611 /* The last field is an array of unsigned integers of size one. */
32612 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32613 get_identifier (field_name[3]),
32614 build_array_type (unsigned_type_node,
32615 build_index_type (size_one_node)));
32616 if (field_chain != NULL_TREE)
32617 DECL_CHAIN (field) = field_chain;
32618 field_chain = field;
32620 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32621 return type;
32624 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32626 static tree
32627 make_var_decl (tree type, const char *name)
32629 tree new_decl;
32631 new_decl = build_decl (UNKNOWN_LOCATION,
32632 VAR_DECL,
32633 get_identifier(name),
32634 type);
32636 DECL_EXTERNAL (new_decl) = 1;
32637 TREE_STATIC (new_decl) = 1;
32638 TREE_PUBLIC (new_decl) = 1;
32639 DECL_INITIAL (new_decl) = 0;
32640 DECL_ARTIFICIAL (new_decl) = 0;
32641 DECL_PRESERVE_P (new_decl) = 1;
32643 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32644 assemble_variable (new_decl, 0, 0, 0);
32646 return new_decl;
32649 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32650 into an integer defined in libgcc/config/i386/cpuinfo.c */
32652 static tree
32653 fold_builtin_cpu (tree fndecl, tree *args)
32655 unsigned int i;
32656 enum ix86_builtins fn_code = (enum ix86_builtins)
32657 DECL_FUNCTION_CODE (fndecl);
32658 tree param_string_cst = NULL;
32660 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32661 enum processor_features
32663 F_CMOV = 0,
32664 F_MMX,
32665 F_POPCNT,
32666 F_SSE,
32667 F_SSE2,
32668 F_SSE3,
32669 F_SSSE3,
32670 F_SSE4_1,
32671 F_SSE4_2,
32672 F_AVX,
32673 F_AVX2,
32674 F_SSE4_A,
32675 F_FMA4,
32676 F_XOP,
32677 F_FMA,
32678 F_MAX
32681 /* These are the values for vendor types and cpu types and subtypes
32682 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32683 the corresponding start value. */
32684 enum processor_model
32686 M_INTEL = 1,
32687 M_AMD,
32688 M_CPU_TYPE_START,
32689 M_INTEL_BONNELL,
32690 M_INTEL_CORE2,
32691 M_INTEL_COREI7,
32692 M_AMDFAM10H,
32693 M_AMDFAM15H,
32694 M_INTEL_SILVERMONT,
32695 M_AMD_BTVER1,
32696 M_AMD_BTVER2,
32697 M_CPU_SUBTYPE_START,
32698 M_INTEL_COREI7_NEHALEM,
32699 M_INTEL_COREI7_WESTMERE,
32700 M_INTEL_COREI7_SANDYBRIDGE,
32701 M_AMDFAM10H_BARCELONA,
32702 M_AMDFAM10H_SHANGHAI,
32703 M_AMDFAM10H_ISTANBUL,
32704 M_AMDFAM15H_BDVER1,
32705 M_AMDFAM15H_BDVER2,
32706 M_AMDFAM15H_BDVER3,
32707 M_AMDFAM15H_BDVER4,
32708 M_INTEL_COREI7_IVYBRIDGE,
32709 M_INTEL_COREI7_HASWELL
32712 static struct _arch_names_table
32714 const char *const name;
32715 const enum processor_model model;
32717 const arch_names_table[] =
32719 {"amd", M_AMD},
32720 {"intel", M_INTEL},
32721 {"atom", M_INTEL_BONNELL},
32722 {"slm", M_INTEL_SILVERMONT},
32723 {"core2", M_INTEL_CORE2},
32724 {"corei7", M_INTEL_COREI7},
32725 {"nehalem", M_INTEL_COREI7_NEHALEM},
32726 {"westmere", M_INTEL_COREI7_WESTMERE},
32727 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32728 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32729 {"haswell", M_INTEL_COREI7_HASWELL},
32730 {"bonnell", M_INTEL_BONNELL},
32731 {"silvermont", M_INTEL_SILVERMONT},
32732 {"amdfam10h", M_AMDFAM10H},
32733 {"barcelona", M_AMDFAM10H_BARCELONA},
32734 {"shanghai", M_AMDFAM10H_SHANGHAI},
32735 {"istanbul", M_AMDFAM10H_ISTANBUL},
32736 {"btver1", M_AMD_BTVER1},
32737 {"amdfam15h", M_AMDFAM15H},
32738 {"bdver1", M_AMDFAM15H_BDVER1},
32739 {"bdver2", M_AMDFAM15H_BDVER2},
32740 {"bdver3", M_AMDFAM15H_BDVER3},
32741 {"bdver4", M_AMDFAM15H_BDVER4},
32742 {"btver2", M_AMD_BTVER2},
32745 static struct _isa_names_table
32747 const char *const name;
32748 const enum processor_features feature;
32750 const isa_names_table[] =
32752 {"cmov", F_CMOV},
32753 {"mmx", F_MMX},
32754 {"popcnt", F_POPCNT},
32755 {"sse", F_SSE},
32756 {"sse2", F_SSE2},
32757 {"sse3", F_SSE3},
32758 {"ssse3", F_SSSE3},
32759 {"sse4a", F_SSE4_A},
32760 {"sse4.1", F_SSE4_1},
32761 {"sse4.2", F_SSE4_2},
32762 {"avx", F_AVX},
32763 {"fma4", F_FMA4},
32764 {"xop", F_XOP},
32765 {"fma", F_FMA},
32766 {"avx2", F_AVX2}
32769 tree __processor_model_type = build_processor_model_struct ();
32770 tree __cpu_model_var = make_var_decl (__processor_model_type,
32771 "__cpu_model");
32774 varpool_add_new_variable (__cpu_model_var);
32776 gcc_assert ((args != NULL) && (*args != NULL));
32778 param_string_cst = *args;
32779 while (param_string_cst
32780 && TREE_CODE (param_string_cst) != STRING_CST)
32782 /* *args must be a expr that can contain other EXPRS leading to a
32783 STRING_CST. */
32784 if (!EXPR_P (param_string_cst))
32786 error ("Parameter to builtin must be a string constant or literal");
32787 return integer_zero_node;
32789 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32792 gcc_assert (param_string_cst);
32794 if (fn_code == IX86_BUILTIN_CPU_IS)
32796 tree ref;
32797 tree field;
32798 tree final;
32800 unsigned int field_val = 0;
32801 unsigned int NUM_ARCH_NAMES
32802 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32804 for (i = 0; i < NUM_ARCH_NAMES; i++)
32805 if (strcmp (arch_names_table[i].name,
32806 TREE_STRING_POINTER (param_string_cst)) == 0)
32807 break;
32809 if (i == NUM_ARCH_NAMES)
32811 error ("Parameter to builtin not valid: %s",
32812 TREE_STRING_POINTER (param_string_cst));
32813 return integer_zero_node;
32816 field = TYPE_FIELDS (__processor_model_type);
32817 field_val = arch_names_table[i].model;
32819 /* CPU types are stored in the next field. */
32820 if (field_val > M_CPU_TYPE_START
32821 && field_val < M_CPU_SUBTYPE_START)
32823 field = DECL_CHAIN (field);
32824 field_val -= M_CPU_TYPE_START;
32827 /* CPU subtypes are stored in the next field. */
32828 if (field_val > M_CPU_SUBTYPE_START)
32830 field = DECL_CHAIN ( DECL_CHAIN (field));
32831 field_val -= M_CPU_SUBTYPE_START;
32834 /* Get the appropriate field in __cpu_model. */
32835 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32836 field, NULL_TREE);
32838 /* Check the value. */
32839 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32840 build_int_cstu (unsigned_type_node, field_val));
32841 return build1 (CONVERT_EXPR, integer_type_node, final);
32843 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32845 tree ref;
32846 tree array_elt;
32847 tree field;
32848 tree final;
32850 unsigned int field_val = 0;
32851 unsigned int NUM_ISA_NAMES
32852 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32854 for (i = 0; i < NUM_ISA_NAMES; i++)
32855 if (strcmp (isa_names_table[i].name,
32856 TREE_STRING_POINTER (param_string_cst)) == 0)
32857 break;
32859 if (i == NUM_ISA_NAMES)
32861 error ("Parameter to builtin not valid: %s",
32862 TREE_STRING_POINTER (param_string_cst));
32863 return integer_zero_node;
32866 field = TYPE_FIELDS (__processor_model_type);
32867 /* Get the last field, which is __cpu_features. */
32868 while (DECL_CHAIN (field))
32869 field = DECL_CHAIN (field);
32871 /* Get the appropriate field: __cpu_model.__cpu_features */
32872 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32873 field, NULL_TREE);
32875 /* Access the 0th element of __cpu_features array. */
32876 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32877 integer_zero_node, NULL_TREE, NULL_TREE);
32879 field_val = (1 << isa_names_table[i].feature);
32880 /* Return __cpu_model.__cpu_features[0] & field_val */
32881 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32882 build_int_cstu (unsigned_type_node, field_val));
32883 return build1 (CONVERT_EXPR, integer_type_node, final);
32885 gcc_unreachable ();
32888 static tree
32889 ix86_fold_builtin (tree fndecl, int n_args,
32890 tree *args, bool ignore ATTRIBUTE_UNUSED)
32892 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32894 enum ix86_builtins fn_code = (enum ix86_builtins)
32895 DECL_FUNCTION_CODE (fndecl);
32896 if (fn_code == IX86_BUILTIN_CPU_IS
32897 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32899 gcc_assert (n_args == 1);
32900 return fold_builtin_cpu (fndecl, args);
32904 #ifdef SUBTARGET_FOLD_BUILTIN
32905 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32906 #endif
32908 return NULL_TREE;
32911 /* Make builtins to detect cpu type and features supported. NAME is
32912 the builtin name, CODE is the builtin code, and FTYPE is the function
32913 type of the builtin. */
32915 static void
32916 make_cpu_type_builtin (const char* name, int code,
32917 enum ix86_builtin_func_type ftype, bool is_const)
32919 tree decl;
32920 tree type;
32922 type = ix86_get_builtin_func_type (ftype);
32923 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32924 NULL, NULL_TREE);
32925 gcc_assert (decl != NULL_TREE);
32926 ix86_builtins[(int) code] = decl;
32927 TREE_READONLY (decl) = is_const;
32930 /* Make builtins to get CPU type and features supported. The created
32931 builtins are :
32933 __builtin_cpu_init (), to detect cpu type and features,
32934 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32935 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32938 static void
32939 ix86_init_platform_type_builtins (void)
32941 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32942 INT_FTYPE_VOID, false);
32943 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32944 INT_FTYPE_PCCHAR, true);
32945 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32946 INT_FTYPE_PCCHAR, true);
32949 /* Internal method for ix86_init_builtins. */
32951 static void
32952 ix86_init_builtins_va_builtins_abi (void)
32954 tree ms_va_ref, sysv_va_ref;
32955 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32956 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32957 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32958 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32960 if (!TARGET_64BIT)
32961 return;
32962 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32963 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32964 ms_va_ref = build_reference_type (ms_va_list_type_node);
32965 sysv_va_ref =
32966 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32968 fnvoid_va_end_ms =
32969 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32970 fnvoid_va_start_ms =
32971 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32972 fnvoid_va_end_sysv =
32973 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32974 fnvoid_va_start_sysv =
32975 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32976 NULL_TREE);
32977 fnvoid_va_copy_ms =
32978 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32979 NULL_TREE);
32980 fnvoid_va_copy_sysv =
32981 build_function_type_list (void_type_node, sysv_va_ref,
32982 sysv_va_ref, NULL_TREE);
32984 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32985 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32986 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32987 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32988 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32989 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32990 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32991 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32992 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32993 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32994 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32995 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32998 static void
32999 ix86_init_builtin_types (void)
33001 tree float128_type_node, float80_type_node;
33003 /* The __float80 type. */
33004 float80_type_node = long_double_type_node;
33005 if (TYPE_MODE (float80_type_node) != XFmode)
33007 /* The __float80 type. */
33008 float80_type_node = make_node (REAL_TYPE);
33010 TYPE_PRECISION (float80_type_node) = 80;
33011 layout_type (float80_type_node);
33013 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
33015 /* The __float128 type. */
33016 float128_type_node = make_node (REAL_TYPE);
33017 TYPE_PRECISION (float128_type_node) = 128;
33018 layout_type (float128_type_node);
33019 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
33021 /* This macro is built by i386-builtin-types.awk. */
33022 DEFINE_BUILTIN_PRIMITIVE_TYPES;
33025 static void
33026 ix86_init_builtins (void)
33028 tree t;
33030 ix86_init_builtin_types ();
33032 /* Builtins to get CPU type and features. */
33033 ix86_init_platform_type_builtins ();
33035 /* TFmode support builtins. */
33036 def_builtin_const (0, "__builtin_infq",
33037 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
33038 def_builtin_const (0, "__builtin_huge_valq",
33039 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
33041 /* We will expand them to normal call if SSE isn't available since
33042 they are used by libgcc. */
33043 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
33044 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
33045 BUILT_IN_MD, "__fabstf2", NULL_TREE);
33046 TREE_READONLY (t) = 1;
33047 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
33049 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
33050 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
33051 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
33052 TREE_READONLY (t) = 1;
33053 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
33055 ix86_init_tm_builtins ();
33056 ix86_init_mmx_sse_builtins ();
33057 ix86_init_mpx_builtins ();
33059 if (TARGET_LP64)
33060 ix86_init_builtins_va_builtins_abi ();
33062 #ifdef SUBTARGET_INIT_BUILTINS
33063 SUBTARGET_INIT_BUILTINS;
33064 #endif
33067 /* Return the ix86 builtin for CODE. */
33069 static tree
33070 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
33072 if (code >= IX86_BUILTIN_MAX)
33073 return error_mark_node;
33075 return ix86_builtins[code];
33078 /* Errors in the source file can cause expand_expr to return const0_rtx
33079 where we expect a vector. To avoid crashing, use one of the vector
33080 clear instructions. */
33081 static rtx
33082 safe_vector_operand (rtx x, enum machine_mode mode)
33084 if (x == const0_rtx)
33085 x = CONST0_RTX (mode);
33086 return x;
33089 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
33091 static rtx
33092 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
33094 rtx pat;
33095 tree arg0 = CALL_EXPR_ARG (exp, 0);
33096 tree arg1 = CALL_EXPR_ARG (exp, 1);
33097 rtx op0 = expand_normal (arg0);
33098 rtx op1 = expand_normal (arg1);
33099 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33100 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33101 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
33103 if (VECTOR_MODE_P (mode0))
33104 op0 = safe_vector_operand (op0, mode0);
33105 if (VECTOR_MODE_P (mode1))
33106 op1 = safe_vector_operand (op1, mode1);
33108 if (optimize || !target
33109 || GET_MODE (target) != tmode
33110 || !insn_data[icode].operand[0].predicate (target, tmode))
33111 target = gen_reg_rtx (tmode);
33113 if (GET_MODE (op1) == SImode && mode1 == TImode)
33115 rtx x = gen_reg_rtx (V4SImode);
33116 emit_insn (gen_sse2_loadd (x, op1));
33117 op1 = gen_lowpart (TImode, x);
33120 if (!insn_data[icode].operand[1].predicate (op0, mode0))
33121 op0 = copy_to_mode_reg (mode0, op0);
33122 if (!insn_data[icode].operand[2].predicate (op1, mode1))
33123 op1 = copy_to_mode_reg (mode1, op1);
33125 pat = GEN_FCN (icode) (target, op0, op1);
33126 if (! pat)
33127 return 0;
33129 emit_insn (pat);
33131 return target;
33134 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
33136 static rtx
33137 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
33138 enum ix86_builtin_func_type m_type,
33139 enum rtx_code sub_code)
33141 rtx pat;
33142 int i;
33143 int nargs;
33144 bool comparison_p = false;
33145 bool tf_p = false;
33146 bool last_arg_constant = false;
33147 int num_memory = 0;
33148 struct {
33149 rtx op;
33150 enum machine_mode mode;
33151 } args[4];
33153 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33155 switch (m_type)
33157 case MULTI_ARG_4_DF2_DI_I:
33158 case MULTI_ARG_4_DF2_DI_I1:
33159 case MULTI_ARG_4_SF2_SI_I:
33160 case MULTI_ARG_4_SF2_SI_I1:
33161 nargs = 4;
33162 last_arg_constant = true;
33163 break;
33165 case MULTI_ARG_3_SF:
33166 case MULTI_ARG_3_DF:
33167 case MULTI_ARG_3_SF2:
33168 case MULTI_ARG_3_DF2:
33169 case MULTI_ARG_3_DI:
33170 case MULTI_ARG_3_SI:
33171 case MULTI_ARG_3_SI_DI:
33172 case MULTI_ARG_3_HI:
33173 case MULTI_ARG_3_HI_SI:
33174 case MULTI_ARG_3_QI:
33175 case MULTI_ARG_3_DI2:
33176 case MULTI_ARG_3_SI2:
33177 case MULTI_ARG_3_HI2:
33178 case MULTI_ARG_3_QI2:
33179 nargs = 3;
33180 break;
33182 case MULTI_ARG_2_SF:
33183 case MULTI_ARG_2_DF:
33184 case MULTI_ARG_2_DI:
33185 case MULTI_ARG_2_SI:
33186 case MULTI_ARG_2_HI:
33187 case MULTI_ARG_2_QI:
33188 nargs = 2;
33189 break;
33191 case MULTI_ARG_2_DI_IMM:
33192 case MULTI_ARG_2_SI_IMM:
33193 case MULTI_ARG_2_HI_IMM:
33194 case MULTI_ARG_2_QI_IMM:
33195 nargs = 2;
33196 last_arg_constant = true;
33197 break;
33199 case MULTI_ARG_1_SF:
33200 case MULTI_ARG_1_DF:
33201 case MULTI_ARG_1_SF2:
33202 case MULTI_ARG_1_DF2:
33203 case MULTI_ARG_1_DI:
33204 case MULTI_ARG_1_SI:
33205 case MULTI_ARG_1_HI:
33206 case MULTI_ARG_1_QI:
33207 case MULTI_ARG_1_SI_DI:
33208 case MULTI_ARG_1_HI_DI:
33209 case MULTI_ARG_1_HI_SI:
33210 case MULTI_ARG_1_QI_DI:
33211 case MULTI_ARG_1_QI_SI:
33212 case MULTI_ARG_1_QI_HI:
33213 nargs = 1;
33214 break;
33216 case MULTI_ARG_2_DI_CMP:
33217 case MULTI_ARG_2_SI_CMP:
33218 case MULTI_ARG_2_HI_CMP:
33219 case MULTI_ARG_2_QI_CMP:
33220 nargs = 2;
33221 comparison_p = true;
33222 break;
33224 case MULTI_ARG_2_SF_TF:
33225 case MULTI_ARG_2_DF_TF:
33226 case MULTI_ARG_2_DI_TF:
33227 case MULTI_ARG_2_SI_TF:
33228 case MULTI_ARG_2_HI_TF:
33229 case MULTI_ARG_2_QI_TF:
33230 nargs = 2;
33231 tf_p = true;
33232 break;
33234 default:
33235 gcc_unreachable ();
33238 if (optimize || !target
33239 || GET_MODE (target) != tmode
33240 || !insn_data[icode].operand[0].predicate (target, tmode))
33241 target = gen_reg_rtx (tmode);
33243 gcc_assert (nargs <= 4);
33245 for (i = 0; i < nargs; i++)
33247 tree arg = CALL_EXPR_ARG (exp, i);
33248 rtx op = expand_normal (arg);
33249 int adjust = (comparison_p) ? 1 : 0;
33250 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
33252 if (last_arg_constant && i == nargs - 1)
33254 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
33256 enum insn_code new_icode = icode;
33257 switch (icode)
33259 case CODE_FOR_xop_vpermil2v2df3:
33260 case CODE_FOR_xop_vpermil2v4sf3:
33261 case CODE_FOR_xop_vpermil2v4df3:
33262 case CODE_FOR_xop_vpermil2v8sf3:
33263 error ("the last argument must be a 2-bit immediate");
33264 return gen_reg_rtx (tmode);
33265 case CODE_FOR_xop_rotlv2di3:
33266 new_icode = CODE_FOR_rotlv2di3;
33267 goto xop_rotl;
33268 case CODE_FOR_xop_rotlv4si3:
33269 new_icode = CODE_FOR_rotlv4si3;
33270 goto xop_rotl;
33271 case CODE_FOR_xop_rotlv8hi3:
33272 new_icode = CODE_FOR_rotlv8hi3;
33273 goto xop_rotl;
33274 case CODE_FOR_xop_rotlv16qi3:
33275 new_icode = CODE_FOR_rotlv16qi3;
33276 xop_rotl:
33277 if (CONST_INT_P (op))
33279 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
33280 op = GEN_INT (INTVAL (op) & mask);
33281 gcc_checking_assert
33282 (insn_data[icode].operand[i + 1].predicate (op, mode));
33284 else
33286 gcc_checking_assert
33287 (nargs == 2
33288 && insn_data[new_icode].operand[0].mode == tmode
33289 && insn_data[new_icode].operand[1].mode == tmode
33290 && insn_data[new_icode].operand[2].mode == mode
33291 && insn_data[new_icode].operand[0].predicate
33292 == insn_data[icode].operand[0].predicate
33293 && insn_data[new_icode].operand[1].predicate
33294 == insn_data[icode].operand[1].predicate);
33295 icode = new_icode;
33296 goto non_constant;
33298 break;
33299 default:
33300 gcc_unreachable ();
33304 else
33306 non_constant:
33307 if (VECTOR_MODE_P (mode))
33308 op = safe_vector_operand (op, mode);
33310 /* If we aren't optimizing, only allow one memory operand to be
33311 generated. */
33312 if (memory_operand (op, mode))
33313 num_memory++;
33315 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33317 if (optimize
33318 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33319 || num_memory > 1)
33320 op = force_reg (mode, op);
33323 args[i].op = op;
33324 args[i].mode = mode;
33327 switch (nargs)
33329 case 1:
33330 pat = GEN_FCN (icode) (target, args[0].op);
33331 break;
33333 case 2:
33334 if (tf_p)
33335 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33336 GEN_INT ((int)sub_code));
33337 else if (! comparison_p)
33338 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33339 else
33341 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33342 args[0].op,
33343 args[1].op);
33345 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33347 break;
33349 case 3:
33350 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33351 break;
33353 case 4:
33354 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33355 break;
33357 default:
33358 gcc_unreachable ();
33361 if (! pat)
33362 return 0;
33364 emit_insn (pat);
33365 return target;
33368 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33369 insns with vec_merge. */
33371 static rtx
33372 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33373 rtx target)
33375 rtx pat;
33376 tree arg0 = CALL_EXPR_ARG (exp, 0);
33377 rtx op1, op0 = expand_normal (arg0);
33378 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33379 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33381 if (optimize || !target
33382 || GET_MODE (target) != tmode
33383 || !insn_data[icode].operand[0].predicate (target, tmode))
33384 target = gen_reg_rtx (tmode);
33386 if (VECTOR_MODE_P (mode0))
33387 op0 = safe_vector_operand (op0, mode0);
33389 if ((optimize && !register_operand (op0, mode0))
33390 || !insn_data[icode].operand[1].predicate (op0, mode0))
33391 op0 = copy_to_mode_reg (mode0, op0);
33393 op1 = op0;
33394 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33395 op1 = copy_to_mode_reg (mode0, op1);
33397 pat = GEN_FCN (icode) (target, op0, op1);
33398 if (! pat)
33399 return 0;
33400 emit_insn (pat);
33401 return target;
33404 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33406 static rtx
33407 ix86_expand_sse_compare (const struct builtin_description *d,
33408 tree exp, rtx target, bool swap)
33410 rtx pat;
33411 tree arg0 = CALL_EXPR_ARG (exp, 0);
33412 tree arg1 = CALL_EXPR_ARG (exp, 1);
33413 rtx op0 = expand_normal (arg0);
33414 rtx op1 = expand_normal (arg1);
33415 rtx op2;
33416 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33417 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33418 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33419 enum rtx_code comparison = d->comparison;
33421 if (VECTOR_MODE_P (mode0))
33422 op0 = safe_vector_operand (op0, mode0);
33423 if (VECTOR_MODE_P (mode1))
33424 op1 = safe_vector_operand (op1, mode1);
33426 /* Swap operands if we have a comparison that isn't available in
33427 hardware. */
33428 if (swap)
33430 rtx tmp = gen_reg_rtx (mode1);
33431 emit_move_insn (tmp, op1);
33432 op1 = op0;
33433 op0 = tmp;
33436 if (optimize || !target
33437 || GET_MODE (target) != tmode
33438 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33439 target = gen_reg_rtx (tmode);
33441 if ((optimize && !register_operand (op0, mode0))
33442 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33443 op0 = copy_to_mode_reg (mode0, op0);
33444 if ((optimize && !register_operand (op1, mode1))
33445 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33446 op1 = copy_to_mode_reg (mode1, op1);
33448 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33449 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33450 if (! pat)
33451 return 0;
33452 emit_insn (pat);
33453 return target;
33456 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33458 static rtx
33459 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33460 rtx target)
33462 rtx pat;
33463 tree arg0 = CALL_EXPR_ARG (exp, 0);
33464 tree arg1 = CALL_EXPR_ARG (exp, 1);
33465 rtx op0 = expand_normal (arg0);
33466 rtx op1 = expand_normal (arg1);
33467 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33468 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33469 enum rtx_code comparison = d->comparison;
33471 if (VECTOR_MODE_P (mode0))
33472 op0 = safe_vector_operand (op0, mode0);
33473 if (VECTOR_MODE_P (mode1))
33474 op1 = safe_vector_operand (op1, mode1);
33476 /* Swap operands if we have a comparison that isn't available in
33477 hardware. */
33478 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33480 rtx tmp = op1;
33481 op1 = op0;
33482 op0 = tmp;
33485 target = gen_reg_rtx (SImode);
33486 emit_move_insn (target, const0_rtx);
33487 target = gen_rtx_SUBREG (QImode, target, 0);
33489 if ((optimize && !register_operand (op0, mode0))
33490 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33491 op0 = copy_to_mode_reg (mode0, op0);
33492 if ((optimize && !register_operand (op1, mode1))
33493 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33494 op1 = copy_to_mode_reg (mode1, op1);
33496 pat = GEN_FCN (d->icode) (op0, op1);
33497 if (! pat)
33498 return 0;
33499 emit_insn (pat);
33500 emit_insn (gen_rtx_SET (VOIDmode,
33501 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33502 gen_rtx_fmt_ee (comparison, QImode,
33503 SET_DEST (pat),
33504 const0_rtx)));
33506 return SUBREG_REG (target);
33509 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33511 static rtx
33512 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33513 rtx target)
33515 rtx pat;
33516 tree arg0 = CALL_EXPR_ARG (exp, 0);
33517 rtx op1, op0 = expand_normal (arg0);
33518 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33519 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33521 if (optimize || target == 0
33522 || GET_MODE (target) != tmode
33523 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33524 target = gen_reg_rtx (tmode);
33526 if (VECTOR_MODE_P (mode0))
33527 op0 = safe_vector_operand (op0, mode0);
33529 if ((optimize && !register_operand (op0, mode0))
33530 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33531 op0 = copy_to_mode_reg (mode0, op0);
33533 op1 = GEN_INT (d->comparison);
33535 pat = GEN_FCN (d->icode) (target, op0, op1);
33536 if (! pat)
33537 return 0;
33538 emit_insn (pat);
33539 return target;
33542 static rtx
33543 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33544 tree exp, rtx target)
33546 rtx pat;
33547 tree arg0 = CALL_EXPR_ARG (exp, 0);
33548 tree arg1 = CALL_EXPR_ARG (exp, 1);
33549 rtx op0 = expand_normal (arg0);
33550 rtx op1 = expand_normal (arg1);
33551 rtx op2;
33552 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33553 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33554 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33556 if (optimize || target == 0
33557 || GET_MODE (target) != tmode
33558 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33559 target = gen_reg_rtx (tmode);
33561 op0 = safe_vector_operand (op0, mode0);
33562 op1 = safe_vector_operand (op1, mode1);
33564 if ((optimize && !register_operand (op0, mode0))
33565 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33566 op0 = copy_to_mode_reg (mode0, op0);
33567 if ((optimize && !register_operand (op1, mode1))
33568 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33569 op1 = copy_to_mode_reg (mode1, op1);
33571 op2 = GEN_INT (d->comparison);
33573 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33574 if (! pat)
33575 return 0;
33576 emit_insn (pat);
33577 return target;
33580 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33582 static rtx
33583 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33584 rtx target)
33586 rtx pat;
33587 tree arg0 = CALL_EXPR_ARG (exp, 0);
33588 tree arg1 = CALL_EXPR_ARG (exp, 1);
33589 rtx op0 = expand_normal (arg0);
33590 rtx op1 = expand_normal (arg1);
33591 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33592 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33593 enum rtx_code comparison = d->comparison;
33595 if (VECTOR_MODE_P (mode0))
33596 op0 = safe_vector_operand (op0, mode0);
33597 if (VECTOR_MODE_P (mode1))
33598 op1 = safe_vector_operand (op1, mode1);
33600 target = gen_reg_rtx (SImode);
33601 emit_move_insn (target, const0_rtx);
33602 target = gen_rtx_SUBREG (QImode, target, 0);
33604 if ((optimize && !register_operand (op0, mode0))
33605 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33606 op0 = copy_to_mode_reg (mode0, op0);
33607 if ((optimize && !register_operand (op1, mode1))
33608 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33609 op1 = copy_to_mode_reg (mode1, op1);
33611 pat = GEN_FCN (d->icode) (op0, op1);
33612 if (! pat)
33613 return 0;
33614 emit_insn (pat);
33615 emit_insn (gen_rtx_SET (VOIDmode,
33616 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33617 gen_rtx_fmt_ee (comparison, QImode,
33618 SET_DEST (pat),
33619 const0_rtx)));
33621 return SUBREG_REG (target);
33624 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33626 static rtx
33627 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33628 tree exp, rtx target)
33630 rtx pat;
33631 tree arg0 = CALL_EXPR_ARG (exp, 0);
33632 tree arg1 = CALL_EXPR_ARG (exp, 1);
33633 tree arg2 = CALL_EXPR_ARG (exp, 2);
33634 tree arg3 = CALL_EXPR_ARG (exp, 3);
33635 tree arg4 = CALL_EXPR_ARG (exp, 4);
33636 rtx scratch0, scratch1;
33637 rtx op0 = expand_normal (arg0);
33638 rtx op1 = expand_normal (arg1);
33639 rtx op2 = expand_normal (arg2);
33640 rtx op3 = expand_normal (arg3);
33641 rtx op4 = expand_normal (arg4);
33642 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33644 tmode0 = insn_data[d->icode].operand[0].mode;
33645 tmode1 = insn_data[d->icode].operand[1].mode;
33646 modev2 = insn_data[d->icode].operand[2].mode;
33647 modei3 = insn_data[d->icode].operand[3].mode;
33648 modev4 = insn_data[d->icode].operand[4].mode;
33649 modei5 = insn_data[d->icode].operand[5].mode;
33650 modeimm = insn_data[d->icode].operand[6].mode;
33652 if (VECTOR_MODE_P (modev2))
33653 op0 = safe_vector_operand (op0, modev2);
33654 if (VECTOR_MODE_P (modev4))
33655 op2 = safe_vector_operand (op2, modev4);
33657 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33658 op0 = copy_to_mode_reg (modev2, op0);
33659 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33660 op1 = copy_to_mode_reg (modei3, op1);
33661 if ((optimize && !register_operand (op2, modev4))
33662 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33663 op2 = copy_to_mode_reg (modev4, op2);
33664 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33665 op3 = copy_to_mode_reg (modei5, op3);
33667 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33669 error ("the fifth argument must be an 8-bit immediate");
33670 return const0_rtx;
33673 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33675 if (optimize || !target
33676 || GET_MODE (target) != tmode0
33677 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33678 target = gen_reg_rtx (tmode0);
33680 scratch1 = gen_reg_rtx (tmode1);
33682 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33684 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33686 if (optimize || !target
33687 || GET_MODE (target) != tmode1
33688 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33689 target = gen_reg_rtx (tmode1);
33691 scratch0 = gen_reg_rtx (tmode0);
33693 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33695 else
33697 gcc_assert (d->flag);
33699 scratch0 = gen_reg_rtx (tmode0);
33700 scratch1 = gen_reg_rtx (tmode1);
33702 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33705 if (! pat)
33706 return 0;
33708 emit_insn (pat);
33710 if (d->flag)
33712 target = gen_reg_rtx (SImode);
33713 emit_move_insn (target, const0_rtx);
33714 target = gen_rtx_SUBREG (QImode, target, 0);
33716 emit_insn
33717 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33718 gen_rtx_fmt_ee (EQ, QImode,
33719 gen_rtx_REG ((enum machine_mode) d->flag,
33720 FLAGS_REG),
33721 const0_rtx)));
33722 return SUBREG_REG (target);
33724 else
33725 return target;
33729 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33731 static rtx
33732 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33733 tree exp, rtx target)
33735 rtx pat;
33736 tree arg0 = CALL_EXPR_ARG (exp, 0);
33737 tree arg1 = CALL_EXPR_ARG (exp, 1);
33738 tree arg2 = CALL_EXPR_ARG (exp, 2);
33739 rtx scratch0, scratch1;
33740 rtx op0 = expand_normal (arg0);
33741 rtx op1 = expand_normal (arg1);
33742 rtx op2 = expand_normal (arg2);
33743 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33745 tmode0 = insn_data[d->icode].operand[0].mode;
33746 tmode1 = insn_data[d->icode].operand[1].mode;
33747 modev2 = insn_data[d->icode].operand[2].mode;
33748 modev3 = insn_data[d->icode].operand[3].mode;
33749 modeimm = insn_data[d->icode].operand[4].mode;
33751 if (VECTOR_MODE_P (modev2))
33752 op0 = safe_vector_operand (op0, modev2);
33753 if (VECTOR_MODE_P (modev3))
33754 op1 = safe_vector_operand (op1, modev3);
33756 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33757 op0 = copy_to_mode_reg (modev2, op0);
33758 if ((optimize && !register_operand (op1, modev3))
33759 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33760 op1 = copy_to_mode_reg (modev3, op1);
33762 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33764 error ("the third argument must be an 8-bit immediate");
33765 return const0_rtx;
33768 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33770 if (optimize || !target
33771 || GET_MODE (target) != tmode0
33772 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33773 target = gen_reg_rtx (tmode0);
33775 scratch1 = gen_reg_rtx (tmode1);
33777 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33779 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33781 if (optimize || !target
33782 || GET_MODE (target) != tmode1
33783 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33784 target = gen_reg_rtx (tmode1);
33786 scratch0 = gen_reg_rtx (tmode0);
33788 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33790 else
33792 gcc_assert (d->flag);
33794 scratch0 = gen_reg_rtx (tmode0);
33795 scratch1 = gen_reg_rtx (tmode1);
33797 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33800 if (! pat)
33801 return 0;
33803 emit_insn (pat);
33805 if (d->flag)
33807 target = gen_reg_rtx (SImode);
33808 emit_move_insn (target, const0_rtx);
33809 target = gen_rtx_SUBREG (QImode, target, 0);
33811 emit_insn
33812 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33813 gen_rtx_fmt_ee (EQ, QImode,
33814 gen_rtx_REG ((enum machine_mode) d->flag,
33815 FLAGS_REG),
33816 const0_rtx)));
33817 return SUBREG_REG (target);
33819 else
33820 return target;
33823 /* Subroutine of ix86_expand_builtin to take care of insns with
33824 variable number of operands. */
33826 static rtx
33827 ix86_expand_args_builtin (const struct builtin_description *d,
33828 tree exp, rtx target)
33830 rtx pat, real_target;
33831 unsigned int i, nargs;
33832 unsigned int nargs_constant = 0;
33833 unsigned int mask_pos = 0;
33834 int num_memory = 0;
33835 struct
33837 rtx op;
33838 enum machine_mode mode;
33839 } args[6];
33840 bool last_arg_count = false;
33841 enum insn_code icode = d->icode;
33842 const struct insn_data_d *insn_p = &insn_data[icode];
33843 enum machine_mode tmode = insn_p->operand[0].mode;
33844 enum machine_mode rmode = VOIDmode;
33845 bool swap = false;
33846 enum rtx_code comparison = d->comparison;
33848 switch ((enum ix86_builtin_func_type) d->flag)
33850 case V2DF_FTYPE_V2DF_ROUND:
33851 case V4DF_FTYPE_V4DF_ROUND:
33852 case V4SF_FTYPE_V4SF_ROUND:
33853 case V8SF_FTYPE_V8SF_ROUND:
33854 case V4SI_FTYPE_V4SF_ROUND:
33855 case V8SI_FTYPE_V8SF_ROUND:
33856 return ix86_expand_sse_round (d, exp, target);
33857 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33858 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33859 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33860 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33861 case INT_FTYPE_V8SF_V8SF_PTEST:
33862 case INT_FTYPE_V4DI_V4DI_PTEST:
33863 case INT_FTYPE_V4DF_V4DF_PTEST:
33864 case INT_FTYPE_V4SF_V4SF_PTEST:
33865 case INT_FTYPE_V2DI_V2DI_PTEST:
33866 case INT_FTYPE_V2DF_V2DF_PTEST:
33867 return ix86_expand_sse_ptest (d, exp, target);
33868 case FLOAT128_FTYPE_FLOAT128:
33869 case FLOAT_FTYPE_FLOAT:
33870 case INT_FTYPE_INT:
33871 case UINT64_FTYPE_INT:
33872 case UINT16_FTYPE_UINT16:
33873 case INT64_FTYPE_INT64:
33874 case INT64_FTYPE_V4SF:
33875 case INT64_FTYPE_V2DF:
33876 case INT_FTYPE_V16QI:
33877 case INT_FTYPE_V8QI:
33878 case INT_FTYPE_V8SF:
33879 case INT_FTYPE_V4DF:
33880 case INT_FTYPE_V4SF:
33881 case INT_FTYPE_V2DF:
33882 case INT_FTYPE_V32QI:
33883 case V16QI_FTYPE_V16QI:
33884 case V8SI_FTYPE_V8SF:
33885 case V8SI_FTYPE_V4SI:
33886 case V8HI_FTYPE_V8HI:
33887 case V8HI_FTYPE_V16QI:
33888 case V8QI_FTYPE_V8QI:
33889 case V8SF_FTYPE_V8SF:
33890 case V8SF_FTYPE_V8SI:
33891 case V8SF_FTYPE_V4SF:
33892 case V8SF_FTYPE_V8HI:
33893 case V4SI_FTYPE_V4SI:
33894 case V4SI_FTYPE_V16QI:
33895 case V4SI_FTYPE_V4SF:
33896 case V4SI_FTYPE_V8SI:
33897 case V4SI_FTYPE_V8HI:
33898 case V4SI_FTYPE_V4DF:
33899 case V4SI_FTYPE_V2DF:
33900 case V4HI_FTYPE_V4HI:
33901 case V4DF_FTYPE_V4DF:
33902 case V4DF_FTYPE_V4SI:
33903 case V4DF_FTYPE_V4SF:
33904 case V4DF_FTYPE_V2DF:
33905 case V4SF_FTYPE_V4SF:
33906 case V4SF_FTYPE_V4SI:
33907 case V4SF_FTYPE_V8SF:
33908 case V4SF_FTYPE_V4DF:
33909 case V4SF_FTYPE_V8HI:
33910 case V4SF_FTYPE_V2DF:
33911 case V2DI_FTYPE_V2DI:
33912 case V2DI_FTYPE_V16QI:
33913 case V2DI_FTYPE_V8HI:
33914 case V2DI_FTYPE_V4SI:
33915 case V2DF_FTYPE_V2DF:
33916 case V2DF_FTYPE_V4SI:
33917 case V2DF_FTYPE_V4DF:
33918 case V2DF_FTYPE_V4SF:
33919 case V2DF_FTYPE_V2SI:
33920 case V2SI_FTYPE_V2SI:
33921 case V2SI_FTYPE_V4SF:
33922 case V2SI_FTYPE_V2SF:
33923 case V2SI_FTYPE_V2DF:
33924 case V2SF_FTYPE_V2SF:
33925 case V2SF_FTYPE_V2SI:
33926 case V32QI_FTYPE_V32QI:
33927 case V32QI_FTYPE_V16QI:
33928 case V16HI_FTYPE_V16HI:
33929 case V16HI_FTYPE_V8HI:
33930 case V8SI_FTYPE_V8SI:
33931 case V16HI_FTYPE_V16QI:
33932 case V8SI_FTYPE_V16QI:
33933 case V4DI_FTYPE_V16QI:
33934 case V8SI_FTYPE_V8HI:
33935 case V4DI_FTYPE_V8HI:
33936 case V4DI_FTYPE_V4SI:
33937 case V4DI_FTYPE_V2DI:
33938 case HI_FTYPE_HI:
33939 case UINT_FTYPE_V2DF:
33940 case UINT_FTYPE_V4SF:
33941 case UINT64_FTYPE_V2DF:
33942 case UINT64_FTYPE_V4SF:
33943 case V16QI_FTYPE_V8DI:
33944 case V16HI_FTYPE_V16SI:
33945 case V16SI_FTYPE_HI:
33946 case V16SI_FTYPE_V16SI:
33947 case V16SI_FTYPE_INT:
33948 case V16SF_FTYPE_FLOAT:
33949 case V16SF_FTYPE_V4SF:
33950 case V16SF_FTYPE_V16SF:
33951 case V8HI_FTYPE_V8DI:
33952 case V8UHI_FTYPE_V8UHI:
33953 case V8SI_FTYPE_V8DI:
33954 case V8USI_FTYPE_V8USI:
33955 case V8SF_FTYPE_V8DF:
33956 case V8DI_FTYPE_QI:
33957 case V8DI_FTYPE_INT64:
33958 case V8DI_FTYPE_V4DI:
33959 case V8DI_FTYPE_V8DI:
33960 case V8DF_FTYPE_DOUBLE:
33961 case V8DF_FTYPE_V4DF:
33962 case V8DF_FTYPE_V8DF:
33963 case V8DF_FTYPE_V8SI:
33964 nargs = 1;
33965 break;
33966 case V4SF_FTYPE_V4SF_VEC_MERGE:
33967 case V2DF_FTYPE_V2DF_VEC_MERGE:
33968 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33969 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33970 case V16QI_FTYPE_V16QI_V16QI:
33971 case V16QI_FTYPE_V8HI_V8HI:
33972 case V16SI_FTYPE_V16SI_V16SI:
33973 case V16SF_FTYPE_V16SF_V16SF:
33974 case V16SF_FTYPE_V16SF_V16SI:
33975 case V8QI_FTYPE_V8QI_V8QI:
33976 case V8QI_FTYPE_V4HI_V4HI:
33977 case V8HI_FTYPE_V8HI_V8HI:
33978 case V8HI_FTYPE_V16QI_V16QI:
33979 case V8HI_FTYPE_V4SI_V4SI:
33980 case V8SF_FTYPE_V8SF_V8SF:
33981 case V8SF_FTYPE_V8SF_V8SI:
33982 case V8DI_FTYPE_V8DI_V8DI:
33983 case V8DF_FTYPE_V8DF_V8DF:
33984 case V8DF_FTYPE_V8DF_V8DI:
33985 case V4SI_FTYPE_V4SI_V4SI:
33986 case V4SI_FTYPE_V8HI_V8HI:
33987 case V4SI_FTYPE_V4SF_V4SF:
33988 case V4SI_FTYPE_V2DF_V2DF:
33989 case V4HI_FTYPE_V4HI_V4HI:
33990 case V4HI_FTYPE_V8QI_V8QI:
33991 case V4HI_FTYPE_V2SI_V2SI:
33992 case V4DF_FTYPE_V4DF_V4DF:
33993 case V4DF_FTYPE_V4DF_V4DI:
33994 case V4SF_FTYPE_V4SF_V4SF:
33995 case V4SF_FTYPE_V4SF_V4SI:
33996 case V4SF_FTYPE_V4SF_V2SI:
33997 case V4SF_FTYPE_V4SF_V2DF:
33998 case V4SF_FTYPE_V4SF_UINT:
33999 case V4SF_FTYPE_V4SF_UINT64:
34000 case V4SF_FTYPE_V4SF_DI:
34001 case V4SF_FTYPE_V4SF_SI:
34002 case V2DI_FTYPE_V2DI_V2DI:
34003 case V2DI_FTYPE_V16QI_V16QI:
34004 case V2DI_FTYPE_V4SI_V4SI:
34005 case V2UDI_FTYPE_V4USI_V4USI:
34006 case V2DI_FTYPE_V2DI_V16QI:
34007 case V2DI_FTYPE_V2DF_V2DF:
34008 case V2SI_FTYPE_V2SI_V2SI:
34009 case V2SI_FTYPE_V4HI_V4HI:
34010 case V2SI_FTYPE_V2SF_V2SF:
34011 case V2DF_FTYPE_V2DF_V2DF:
34012 case V2DF_FTYPE_V2DF_V4SF:
34013 case V2DF_FTYPE_V2DF_V2DI:
34014 case V2DF_FTYPE_V2DF_DI:
34015 case V2DF_FTYPE_V2DF_SI:
34016 case V2DF_FTYPE_V2DF_UINT:
34017 case V2DF_FTYPE_V2DF_UINT64:
34018 case V2SF_FTYPE_V2SF_V2SF:
34019 case V1DI_FTYPE_V1DI_V1DI:
34020 case V1DI_FTYPE_V8QI_V8QI:
34021 case V1DI_FTYPE_V2SI_V2SI:
34022 case V32QI_FTYPE_V16HI_V16HI:
34023 case V16HI_FTYPE_V8SI_V8SI:
34024 case V32QI_FTYPE_V32QI_V32QI:
34025 case V16HI_FTYPE_V32QI_V32QI:
34026 case V16HI_FTYPE_V16HI_V16HI:
34027 case V8SI_FTYPE_V4DF_V4DF:
34028 case V8SI_FTYPE_V8SI_V8SI:
34029 case V8SI_FTYPE_V16HI_V16HI:
34030 case V4DI_FTYPE_V4DI_V4DI:
34031 case V4DI_FTYPE_V8SI_V8SI:
34032 case V4UDI_FTYPE_V8USI_V8USI:
34033 case QI_FTYPE_V8DI_V8DI:
34034 case HI_FTYPE_V16SI_V16SI:
34035 if (comparison == UNKNOWN)
34036 return ix86_expand_binop_builtin (icode, exp, target);
34037 nargs = 2;
34038 break;
34039 case V4SF_FTYPE_V4SF_V4SF_SWAP:
34040 case V2DF_FTYPE_V2DF_V2DF_SWAP:
34041 gcc_assert (comparison != UNKNOWN);
34042 nargs = 2;
34043 swap = true;
34044 break;
34045 case V16HI_FTYPE_V16HI_V8HI_COUNT:
34046 case V16HI_FTYPE_V16HI_SI_COUNT:
34047 case V8SI_FTYPE_V8SI_V4SI_COUNT:
34048 case V8SI_FTYPE_V8SI_SI_COUNT:
34049 case V4DI_FTYPE_V4DI_V2DI_COUNT:
34050 case V4DI_FTYPE_V4DI_INT_COUNT:
34051 case V8HI_FTYPE_V8HI_V8HI_COUNT:
34052 case V8HI_FTYPE_V8HI_SI_COUNT:
34053 case V4SI_FTYPE_V4SI_V4SI_COUNT:
34054 case V4SI_FTYPE_V4SI_SI_COUNT:
34055 case V4HI_FTYPE_V4HI_V4HI_COUNT:
34056 case V4HI_FTYPE_V4HI_SI_COUNT:
34057 case V2DI_FTYPE_V2DI_V2DI_COUNT:
34058 case V2DI_FTYPE_V2DI_SI_COUNT:
34059 case V2SI_FTYPE_V2SI_V2SI_COUNT:
34060 case V2SI_FTYPE_V2SI_SI_COUNT:
34061 case V1DI_FTYPE_V1DI_V1DI_COUNT:
34062 case V1DI_FTYPE_V1DI_SI_COUNT:
34063 nargs = 2;
34064 last_arg_count = true;
34065 break;
34066 case UINT64_FTYPE_UINT64_UINT64:
34067 case UINT_FTYPE_UINT_UINT:
34068 case UINT_FTYPE_UINT_USHORT:
34069 case UINT_FTYPE_UINT_UCHAR:
34070 case UINT16_FTYPE_UINT16_INT:
34071 case UINT8_FTYPE_UINT8_INT:
34072 case HI_FTYPE_HI_HI:
34073 case V16SI_FTYPE_V8DF_V8DF:
34074 nargs = 2;
34075 break;
34076 case V2DI_FTYPE_V2DI_INT_CONVERT:
34077 nargs = 2;
34078 rmode = V1TImode;
34079 nargs_constant = 1;
34080 break;
34081 case V4DI_FTYPE_V4DI_INT_CONVERT:
34082 nargs = 2;
34083 rmode = V2TImode;
34084 nargs_constant = 1;
34085 break;
34086 case V8HI_FTYPE_V8HI_INT:
34087 case V8HI_FTYPE_V8SF_INT:
34088 case V16HI_FTYPE_V16SF_INT:
34089 case V8HI_FTYPE_V4SF_INT:
34090 case V8SF_FTYPE_V8SF_INT:
34091 case V4SF_FTYPE_V16SF_INT:
34092 case V16SF_FTYPE_V16SF_INT:
34093 case V4SI_FTYPE_V4SI_INT:
34094 case V4SI_FTYPE_V8SI_INT:
34095 case V4HI_FTYPE_V4HI_INT:
34096 case V4DF_FTYPE_V4DF_INT:
34097 case V4DF_FTYPE_V8DF_INT:
34098 case V4SF_FTYPE_V4SF_INT:
34099 case V4SF_FTYPE_V8SF_INT:
34100 case V2DI_FTYPE_V2DI_INT:
34101 case V2DF_FTYPE_V2DF_INT:
34102 case V2DF_FTYPE_V4DF_INT:
34103 case V16HI_FTYPE_V16HI_INT:
34104 case V8SI_FTYPE_V8SI_INT:
34105 case V16SI_FTYPE_V16SI_INT:
34106 case V4SI_FTYPE_V16SI_INT:
34107 case V4DI_FTYPE_V4DI_INT:
34108 case V2DI_FTYPE_V4DI_INT:
34109 case V4DI_FTYPE_V8DI_INT:
34110 case HI_FTYPE_HI_INT:
34111 nargs = 2;
34112 nargs_constant = 1;
34113 break;
34114 case V16QI_FTYPE_V16QI_V16QI_V16QI:
34115 case V8SF_FTYPE_V8SF_V8SF_V8SF:
34116 case V4DF_FTYPE_V4DF_V4DF_V4DF:
34117 case V4SF_FTYPE_V4SF_V4SF_V4SF:
34118 case V2DF_FTYPE_V2DF_V2DF_V2DF:
34119 case V32QI_FTYPE_V32QI_V32QI_V32QI:
34120 case HI_FTYPE_V16SI_V16SI_HI:
34121 case QI_FTYPE_V8DI_V8DI_QI:
34122 case V16HI_FTYPE_V16SI_V16HI_HI:
34123 case V16QI_FTYPE_V16SI_V16QI_HI:
34124 case V16QI_FTYPE_V8DI_V16QI_QI:
34125 case V16SF_FTYPE_V16SF_V16SF_HI:
34126 case V16SF_FTYPE_V16SF_V16SF_V16SF:
34127 case V16SF_FTYPE_V16SF_V16SI_V16SF:
34128 case V16SF_FTYPE_V16SI_V16SF_HI:
34129 case V16SF_FTYPE_V16SI_V16SF_V16SF:
34130 case V16SF_FTYPE_V4SF_V16SF_HI:
34131 case V16SI_FTYPE_SI_V16SI_HI:
34132 case V16SI_FTYPE_V16HI_V16SI_HI:
34133 case V16SI_FTYPE_V16QI_V16SI_HI:
34134 case V16SI_FTYPE_V16SF_V16SI_HI:
34135 case V16SI_FTYPE_V16SI_V16SI_HI:
34136 case V16SI_FTYPE_V16SI_V16SI_V16SI:
34137 case V16SI_FTYPE_V4SI_V16SI_HI:
34138 case V2DI_FTYPE_V2DI_V2DI_V2DI:
34139 case V4DI_FTYPE_V4DI_V4DI_V4DI:
34140 case V8DF_FTYPE_V2DF_V8DF_QI:
34141 case V8DF_FTYPE_V4DF_V8DF_QI:
34142 case V8DF_FTYPE_V8DF_V8DF_QI:
34143 case V8DF_FTYPE_V8DF_V8DF_V8DF:
34144 case V8DF_FTYPE_V8DF_V8DI_V8DF:
34145 case V8DF_FTYPE_V8DI_V8DF_V8DF:
34146 case V8DF_FTYPE_V8SF_V8DF_QI:
34147 case V8DF_FTYPE_V8SI_V8DF_QI:
34148 case V8DI_FTYPE_DI_V8DI_QI:
34149 case V8DI_FTYPE_V16QI_V8DI_QI:
34150 case V8DI_FTYPE_V2DI_V8DI_QI:
34151 case V8DI_FTYPE_V4DI_V8DI_QI:
34152 case V8DI_FTYPE_V8DI_V8DI_QI:
34153 case V8DI_FTYPE_V8DI_V8DI_V8DI:
34154 case V8DI_FTYPE_V8HI_V8DI_QI:
34155 case V8DI_FTYPE_V8SI_V8DI_QI:
34156 case V8HI_FTYPE_V8DI_V8HI_QI:
34157 case V8SF_FTYPE_V8DF_V8SF_QI:
34158 case V8SI_FTYPE_V8DF_V8SI_QI:
34159 case V8SI_FTYPE_V8DI_V8SI_QI:
34160 case V4SI_FTYPE_V4SI_V4SI_V4SI:
34161 nargs = 3;
34162 break;
34163 case V32QI_FTYPE_V32QI_V32QI_INT:
34164 case V16HI_FTYPE_V16HI_V16HI_INT:
34165 case V16QI_FTYPE_V16QI_V16QI_INT:
34166 case V4DI_FTYPE_V4DI_V4DI_INT:
34167 case V8HI_FTYPE_V8HI_V8HI_INT:
34168 case V8SI_FTYPE_V8SI_V8SI_INT:
34169 case V8SI_FTYPE_V8SI_V4SI_INT:
34170 case V8SF_FTYPE_V8SF_V8SF_INT:
34171 case V8SF_FTYPE_V8SF_V4SF_INT:
34172 case V4SI_FTYPE_V4SI_V4SI_INT:
34173 case V4DF_FTYPE_V4DF_V4DF_INT:
34174 case V16SF_FTYPE_V16SF_V16SF_INT:
34175 case V16SF_FTYPE_V16SF_V4SF_INT:
34176 case V16SI_FTYPE_V16SI_V4SI_INT:
34177 case V4DF_FTYPE_V4DF_V2DF_INT:
34178 case V4SF_FTYPE_V4SF_V4SF_INT:
34179 case V2DI_FTYPE_V2DI_V2DI_INT:
34180 case V4DI_FTYPE_V4DI_V2DI_INT:
34181 case V2DF_FTYPE_V2DF_V2DF_INT:
34182 case QI_FTYPE_V8DI_V8DI_INT:
34183 case QI_FTYPE_V8DF_V8DF_INT:
34184 case QI_FTYPE_V2DF_V2DF_INT:
34185 case QI_FTYPE_V4SF_V4SF_INT:
34186 case HI_FTYPE_V16SI_V16SI_INT:
34187 case HI_FTYPE_V16SF_V16SF_INT:
34188 nargs = 3;
34189 nargs_constant = 1;
34190 break;
34191 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
34192 nargs = 3;
34193 rmode = V4DImode;
34194 nargs_constant = 1;
34195 break;
34196 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
34197 nargs = 3;
34198 rmode = V2DImode;
34199 nargs_constant = 1;
34200 break;
34201 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
34202 nargs = 3;
34203 rmode = DImode;
34204 nargs_constant = 1;
34205 break;
34206 case V2DI_FTYPE_V2DI_UINT_UINT:
34207 nargs = 3;
34208 nargs_constant = 2;
34209 break;
34210 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
34211 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
34212 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
34213 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
34214 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
34215 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
34216 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
34217 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
34218 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
34219 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
34220 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
34221 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
34222 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
34223 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
34224 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
34225 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
34226 nargs = 4;
34227 break;
34228 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
34229 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
34230 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
34231 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
34232 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
34233 nargs = 4;
34234 nargs_constant = 1;
34235 break;
34236 case QI_FTYPE_V2DF_V2DF_INT_QI:
34237 case QI_FTYPE_V4SF_V4SF_INT_QI:
34238 nargs = 4;
34239 mask_pos = 1;
34240 nargs_constant = 1;
34241 break;
34242 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
34243 nargs = 4;
34244 nargs_constant = 2;
34245 break;
34246 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
34247 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
34248 nargs = 4;
34249 break;
34250 case QI_FTYPE_V8DI_V8DI_INT_QI:
34251 case HI_FTYPE_V16SI_V16SI_INT_HI:
34252 case QI_FTYPE_V8DF_V8DF_INT_QI:
34253 case HI_FTYPE_V16SF_V16SF_INT_HI:
34254 mask_pos = 1;
34255 nargs = 4;
34256 nargs_constant = 1;
34257 break;
34258 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
34259 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
34260 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
34261 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
34262 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
34263 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
34264 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
34265 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
34266 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
34267 nargs = 4;
34268 mask_pos = 2;
34269 nargs_constant = 1;
34270 break;
34271 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
34272 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
34273 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
34274 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
34275 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
34276 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
34277 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
34278 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
34279 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
34280 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
34281 nargs = 5;
34282 mask_pos = 2;
34283 nargs_constant = 1;
34284 break;
34285 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
34286 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
34287 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
34288 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
34289 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
34290 nargs = 5;
34291 mask_pos = 1;
34292 nargs_constant = 1;
34293 break;
34295 default:
34296 gcc_unreachable ();
34299 gcc_assert (nargs <= ARRAY_SIZE (args));
34301 if (comparison != UNKNOWN)
34303 gcc_assert (nargs == 2);
34304 return ix86_expand_sse_compare (d, exp, target, swap);
34307 if (rmode == VOIDmode || rmode == tmode)
34309 if (optimize
34310 || target == 0
34311 || GET_MODE (target) != tmode
34312 || !insn_p->operand[0].predicate (target, tmode))
34313 target = gen_reg_rtx (tmode);
34314 real_target = target;
34316 else
34318 real_target = gen_reg_rtx (tmode);
34319 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34322 for (i = 0; i < nargs; i++)
34324 tree arg = CALL_EXPR_ARG (exp, i);
34325 rtx op = expand_normal (arg);
34326 enum machine_mode mode = insn_p->operand[i + 1].mode;
34327 bool match = insn_p->operand[i + 1].predicate (op, mode);
34329 if (last_arg_count && (i + 1) == nargs)
34331 /* SIMD shift insns take either an 8-bit immediate or
34332 register as count. But builtin functions take int as
34333 count. If count doesn't match, we put it in register. */
34334 if (!match)
34336 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34337 if (!insn_p->operand[i + 1].predicate (op, mode))
34338 op = copy_to_reg (op);
34341 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34342 (!mask_pos && (nargs - i) <= nargs_constant))
34344 if (!match)
34345 switch (icode)
34347 case CODE_FOR_avx2_inserti128:
34348 case CODE_FOR_avx2_extracti128:
34349 error ("the last argument must be an 1-bit immediate");
34350 return const0_rtx;
34352 case CODE_FOR_avx512f_cmpv8di3_mask:
34353 case CODE_FOR_avx512f_cmpv16si3_mask:
34354 case CODE_FOR_avx512f_ucmpv8di3_mask:
34355 case CODE_FOR_avx512f_ucmpv16si3_mask:
34356 error ("the last argument must be a 3-bit immediate");
34357 return const0_rtx;
34359 case CODE_FOR_sse4_1_roundsd:
34360 case CODE_FOR_sse4_1_roundss:
34362 case CODE_FOR_sse4_1_roundpd:
34363 case CODE_FOR_sse4_1_roundps:
34364 case CODE_FOR_avx_roundpd256:
34365 case CODE_FOR_avx_roundps256:
34367 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34368 case CODE_FOR_sse4_1_roundps_sfix:
34369 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34370 case CODE_FOR_avx_roundps_sfix256:
34372 case CODE_FOR_sse4_1_blendps:
34373 case CODE_FOR_avx_blendpd256:
34374 case CODE_FOR_avx_vpermilv4df:
34375 case CODE_FOR_avx512f_getmantv8df_mask:
34376 case CODE_FOR_avx512f_getmantv16sf_mask:
34377 error ("the last argument must be a 4-bit immediate");
34378 return const0_rtx;
34380 case CODE_FOR_sha1rnds4:
34381 case CODE_FOR_sse4_1_blendpd:
34382 case CODE_FOR_avx_vpermilv2df:
34383 case CODE_FOR_xop_vpermil2v2df3:
34384 case CODE_FOR_xop_vpermil2v4sf3:
34385 case CODE_FOR_xop_vpermil2v4df3:
34386 case CODE_FOR_xop_vpermil2v8sf3:
34387 case CODE_FOR_avx512f_vinsertf32x4_mask:
34388 case CODE_FOR_avx512f_vinserti32x4_mask:
34389 case CODE_FOR_avx512f_vextractf32x4_mask:
34390 case CODE_FOR_avx512f_vextracti32x4_mask:
34391 error ("the last argument must be a 2-bit immediate");
34392 return const0_rtx;
34394 case CODE_FOR_avx_vextractf128v4df:
34395 case CODE_FOR_avx_vextractf128v8sf:
34396 case CODE_FOR_avx_vextractf128v8si:
34397 case CODE_FOR_avx_vinsertf128v4df:
34398 case CODE_FOR_avx_vinsertf128v8sf:
34399 case CODE_FOR_avx_vinsertf128v8si:
34400 case CODE_FOR_avx512f_vinsertf64x4_mask:
34401 case CODE_FOR_avx512f_vinserti64x4_mask:
34402 case CODE_FOR_avx512f_vextractf64x4_mask:
34403 case CODE_FOR_avx512f_vextracti64x4_mask:
34404 error ("the last argument must be a 1-bit immediate");
34405 return const0_rtx;
34407 case CODE_FOR_avx_vmcmpv2df3:
34408 case CODE_FOR_avx_vmcmpv4sf3:
34409 case CODE_FOR_avx_cmpv2df3:
34410 case CODE_FOR_avx_cmpv4sf3:
34411 case CODE_FOR_avx_cmpv4df3:
34412 case CODE_FOR_avx_cmpv8sf3:
34413 case CODE_FOR_avx512f_cmpv8df3_mask:
34414 case CODE_FOR_avx512f_cmpv16sf3_mask:
34415 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34416 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34417 error ("the last argument must be a 5-bit immediate");
34418 return const0_rtx;
34420 default:
34421 switch (nargs_constant)
34423 case 2:
34424 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34425 (!mask_pos && (nargs - i) == nargs_constant))
34427 error ("the next to last argument must be an 8-bit immediate");
34428 break;
34430 case 1:
34431 error ("the last argument must be an 8-bit immediate");
34432 break;
34433 default:
34434 gcc_unreachable ();
34436 return const0_rtx;
34439 else
34441 if (VECTOR_MODE_P (mode))
34442 op = safe_vector_operand (op, mode);
34444 /* If we aren't optimizing, only allow one memory operand to
34445 be generated. */
34446 if (memory_operand (op, mode))
34447 num_memory++;
34449 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34451 if (optimize || !match || num_memory > 1)
34452 op = copy_to_mode_reg (mode, op);
34454 else
34456 op = copy_to_reg (op);
34457 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34461 args[i].op = op;
34462 args[i].mode = mode;
34465 switch (nargs)
34467 case 1:
34468 pat = GEN_FCN (icode) (real_target, args[0].op);
34469 break;
34470 case 2:
34471 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34472 break;
34473 case 3:
34474 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34475 args[2].op);
34476 break;
34477 case 4:
34478 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34479 args[2].op, args[3].op);
34480 break;
34481 case 5:
34482 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34483 args[2].op, args[3].op, args[4].op);
34484 case 6:
34485 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34486 args[2].op, args[3].op, args[4].op,
34487 args[5].op);
34488 break;
34489 default:
34490 gcc_unreachable ();
34493 if (! pat)
34494 return 0;
34496 emit_insn (pat);
34497 return target;
34500 /* Transform pattern of following layout:
34501 (parallel [
34502 set (A B)
34503 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34505 into:
34506 (set (A B))
34509 (parallel [ A B
34511 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34514 into:
34515 (parallel [ A B ... ]) */
34517 static rtx
34518 ix86_erase_embedded_rounding (rtx pat)
34520 if (GET_CODE (pat) == INSN)
34521 pat = PATTERN (pat);
34523 gcc_assert (GET_CODE (pat) == PARALLEL);
34525 if (XVECLEN (pat, 0) == 2)
34527 rtx p0 = XVECEXP (pat, 0, 0);
34528 rtx p1 = XVECEXP (pat, 0, 1);
34530 gcc_assert (GET_CODE (p0) == SET
34531 && GET_CODE (p1) == UNSPEC
34532 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34534 return p0;
34536 else
34538 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34539 int i = 0;
34540 int j = 0;
34542 for (; i < XVECLEN (pat, 0); ++i)
34544 rtx elem = XVECEXP (pat, 0, i);
34545 if (GET_CODE (elem) != UNSPEC
34546 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34547 res [j++] = elem;
34550 /* No more than 1 occurence was removed. */
34551 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34553 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34557 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34558 with rounding. */
34559 static rtx
34560 ix86_expand_sse_comi_round (const struct builtin_description *d,
34561 tree exp, rtx target)
34563 rtx pat, set_dst;
34564 tree arg0 = CALL_EXPR_ARG (exp, 0);
34565 tree arg1 = CALL_EXPR_ARG (exp, 1);
34566 tree arg2 = CALL_EXPR_ARG (exp, 2);
34567 tree arg3 = CALL_EXPR_ARG (exp, 3);
34568 rtx op0 = expand_normal (arg0);
34569 rtx op1 = expand_normal (arg1);
34570 rtx op2 = expand_normal (arg2);
34571 rtx op3 = expand_normal (arg3);
34572 enum insn_code icode = d->icode;
34573 const struct insn_data_d *insn_p = &insn_data[icode];
34574 enum machine_mode mode0 = insn_p->operand[0].mode;
34575 enum machine_mode mode1 = insn_p->operand[1].mode;
34576 enum rtx_code comparison = UNEQ;
34577 bool need_ucomi = false;
34579 /* See avxintrin.h for values. */
34580 enum rtx_code comi_comparisons[32] =
34582 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34583 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34584 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34586 bool need_ucomi_values[32] =
34588 true, false, false, true, true, false, false, true,
34589 true, false, false, true, true, false, false, true,
34590 false, true, true, false, false, true, true, false,
34591 false, true, true, false, false, true, true, false
34594 if (!CONST_INT_P (op2))
34596 error ("the third argument must be comparison constant");
34597 return const0_rtx;
34599 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34601 error ("incorect comparison mode");
34602 return const0_rtx;
34605 if (!insn_p->operand[2].predicate (op3, SImode))
34607 error ("incorrect rounding operand");
34608 return const0_rtx;
34611 comparison = comi_comparisons[INTVAL (op2)];
34612 need_ucomi = need_ucomi_values[INTVAL (op2)];
34614 if (VECTOR_MODE_P (mode0))
34615 op0 = safe_vector_operand (op0, mode0);
34616 if (VECTOR_MODE_P (mode1))
34617 op1 = safe_vector_operand (op1, mode1);
34619 target = gen_reg_rtx (SImode);
34620 emit_move_insn (target, const0_rtx);
34621 target = gen_rtx_SUBREG (QImode, target, 0);
34623 if ((optimize && !register_operand (op0, mode0))
34624 || !insn_p->operand[0].predicate (op0, mode0))
34625 op0 = copy_to_mode_reg (mode0, op0);
34626 if ((optimize && !register_operand (op1, mode1))
34627 || !insn_p->operand[1].predicate (op1, mode1))
34628 op1 = copy_to_mode_reg (mode1, op1);
34630 if (need_ucomi)
34631 icode = icode == CODE_FOR_sse_comi_round
34632 ? CODE_FOR_sse_ucomi_round
34633 : CODE_FOR_sse2_ucomi_round;
34635 pat = GEN_FCN (icode) (op0, op1, op3);
34636 if (! pat)
34637 return 0;
34639 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34640 if (INTVAL (op3) == NO_ROUND)
34642 pat = ix86_erase_embedded_rounding (pat);
34643 if (! pat)
34644 return 0;
34646 set_dst = SET_DEST (pat);
34648 else
34650 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34651 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34654 emit_insn (pat);
34655 emit_insn (gen_rtx_SET (VOIDmode,
34656 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34657 gen_rtx_fmt_ee (comparison, QImode,
34658 set_dst,
34659 const0_rtx)));
34661 return SUBREG_REG (target);
34664 static rtx
34665 ix86_expand_round_builtin (const struct builtin_description *d,
34666 tree exp, rtx target)
34668 rtx pat;
34669 unsigned int i, nargs;
34670 struct
34672 rtx op;
34673 enum machine_mode mode;
34674 } args[6];
34675 enum insn_code icode = d->icode;
34676 const struct insn_data_d *insn_p = &insn_data[icode];
34677 enum machine_mode tmode = insn_p->operand[0].mode;
34678 unsigned int nargs_constant = 0;
34679 unsigned int redundant_embed_rnd = 0;
34681 switch ((enum ix86_builtin_func_type) d->flag)
34683 case UINT64_FTYPE_V2DF_INT:
34684 case UINT64_FTYPE_V4SF_INT:
34685 case UINT_FTYPE_V2DF_INT:
34686 case UINT_FTYPE_V4SF_INT:
34687 case INT64_FTYPE_V2DF_INT:
34688 case INT64_FTYPE_V4SF_INT:
34689 case INT_FTYPE_V2DF_INT:
34690 case INT_FTYPE_V4SF_INT:
34691 nargs = 2;
34692 break;
34693 case V4SF_FTYPE_V4SF_UINT_INT:
34694 case V4SF_FTYPE_V4SF_UINT64_INT:
34695 case V2DF_FTYPE_V2DF_UINT64_INT:
34696 case V4SF_FTYPE_V4SF_INT_INT:
34697 case V4SF_FTYPE_V4SF_INT64_INT:
34698 case V2DF_FTYPE_V2DF_INT64_INT:
34699 case V4SF_FTYPE_V4SF_V4SF_INT:
34700 case V2DF_FTYPE_V2DF_V2DF_INT:
34701 case V4SF_FTYPE_V4SF_V2DF_INT:
34702 case V2DF_FTYPE_V2DF_V4SF_INT:
34703 nargs = 3;
34704 break;
34705 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34706 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34707 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34708 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34709 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34710 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34711 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34712 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34713 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34714 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34715 nargs = 4;
34716 break;
34717 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34718 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34719 nargs_constant = 2;
34720 nargs = 4;
34721 break;
34722 case INT_FTYPE_V4SF_V4SF_INT_INT:
34723 case INT_FTYPE_V2DF_V2DF_INT_INT:
34724 return ix86_expand_sse_comi_round (d, exp, target);
34725 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34726 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34727 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34728 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34729 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34730 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34731 nargs = 5;
34732 break;
34733 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34734 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34735 nargs_constant = 4;
34736 nargs = 5;
34737 break;
34738 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34739 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34740 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34741 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34742 nargs_constant = 3;
34743 nargs = 5;
34744 break;
34745 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34746 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34747 nargs = 6;
34748 nargs_constant = 4;
34749 break;
34750 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34751 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34752 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34753 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34754 nargs = 6;
34755 nargs_constant = 3;
34756 break;
34757 default:
34758 gcc_unreachable ();
34760 gcc_assert (nargs <= ARRAY_SIZE (args));
34762 if (optimize
34763 || target == 0
34764 || GET_MODE (target) != tmode
34765 || !insn_p->operand[0].predicate (target, tmode))
34766 target = gen_reg_rtx (tmode);
34768 for (i = 0; i < nargs; i++)
34770 tree arg = CALL_EXPR_ARG (exp, i);
34771 rtx op = expand_normal (arg);
34772 enum machine_mode mode = insn_p->operand[i + 1].mode;
34773 bool match = insn_p->operand[i + 1].predicate (op, mode);
34775 if (i == nargs - nargs_constant)
34777 if (!match)
34779 switch (icode)
34781 case CODE_FOR_avx512f_getmantv8df_mask_round:
34782 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34783 case CODE_FOR_avx512f_getmantv2df_round:
34784 case CODE_FOR_avx512f_getmantv4sf_round:
34785 error ("the immediate argument must be a 4-bit immediate");
34786 return const0_rtx;
34787 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34788 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34789 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34790 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34791 error ("the immediate argument must be a 5-bit immediate");
34792 return const0_rtx;
34793 default:
34794 error ("the immediate argument must be an 8-bit immediate");
34795 return const0_rtx;
34799 else if (i == nargs-1)
34801 if (!insn_p->operand[nargs].predicate (op, SImode))
34803 error ("incorrect rounding operand");
34804 return const0_rtx;
34807 /* If there is no rounding use normal version of the pattern. */
34808 if (INTVAL (op) == NO_ROUND)
34809 redundant_embed_rnd = 1;
34811 else
34813 if (VECTOR_MODE_P (mode))
34814 op = safe_vector_operand (op, mode);
34816 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34818 if (optimize || !match)
34819 op = copy_to_mode_reg (mode, op);
34821 else
34823 op = copy_to_reg (op);
34824 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34828 args[i].op = op;
34829 args[i].mode = mode;
34832 switch (nargs)
34834 case 1:
34835 pat = GEN_FCN (icode) (target, args[0].op);
34836 break;
34837 case 2:
34838 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34839 break;
34840 case 3:
34841 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34842 args[2].op);
34843 break;
34844 case 4:
34845 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34846 args[2].op, args[3].op);
34847 break;
34848 case 5:
34849 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34850 args[2].op, args[3].op, args[4].op);
34851 case 6:
34852 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34853 args[2].op, args[3].op, args[4].op,
34854 args[5].op);
34855 break;
34856 default:
34857 gcc_unreachable ();
34860 if (!pat)
34861 return 0;
34863 if (redundant_embed_rnd)
34864 pat = ix86_erase_embedded_rounding (pat);
34866 emit_insn (pat);
34867 return target;
34870 /* Subroutine of ix86_expand_builtin to take care of special insns
34871 with variable number of operands. */
34873 static rtx
34874 ix86_expand_special_args_builtin (const struct builtin_description *d,
34875 tree exp, rtx target)
34877 tree arg;
34878 rtx pat, op;
34879 unsigned int i, nargs, arg_adjust, memory;
34880 bool aligned_mem = false;
34881 struct
34883 rtx op;
34884 enum machine_mode mode;
34885 } args[3];
34886 enum insn_code icode = d->icode;
34887 bool last_arg_constant = false;
34888 const struct insn_data_d *insn_p = &insn_data[icode];
34889 enum machine_mode tmode = insn_p->operand[0].mode;
34890 enum { load, store } klass;
34892 switch ((enum ix86_builtin_func_type) d->flag)
34894 case VOID_FTYPE_VOID:
34895 emit_insn (GEN_FCN (icode) (target));
34896 return 0;
34897 case VOID_FTYPE_UINT64:
34898 case VOID_FTYPE_UNSIGNED:
34899 nargs = 0;
34900 klass = store;
34901 memory = 0;
34902 break;
34904 case INT_FTYPE_VOID:
34905 case UINT64_FTYPE_VOID:
34906 case UNSIGNED_FTYPE_VOID:
34907 nargs = 0;
34908 klass = load;
34909 memory = 0;
34910 break;
34911 case UINT64_FTYPE_PUNSIGNED:
34912 case V2DI_FTYPE_PV2DI:
34913 case V4DI_FTYPE_PV4DI:
34914 case V32QI_FTYPE_PCCHAR:
34915 case V16QI_FTYPE_PCCHAR:
34916 case V8SF_FTYPE_PCV4SF:
34917 case V8SF_FTYPE_PCFLOAT:
34918 case V4SF_FTYPE_PCFLOAT:
34919 case V4DF_FTYPE_PCV2DF:
34920 case V4DF_FTYPE_PCDOUBLE:
34921 case V2DF_FTYPE_PCDOUBLE:
34922 case VOID_FTYPE_PVOID:
34923 case V16SI_FTYPE_PV4SI:
34924 case V16SF_FTYPE_PV4SF:
34925 case V8DI_FTYPE_PV4DI:
34926 case V8DI_FTYPE_PV8DI:
34927 case V8DF_FTYPE_PV4DF:
34928 nargs = 1;
34929 klass = load;
34930 memory = 0;
34931 switch (icode)
34933 case CODE_FOR_sse4_1_movntdqa:
34934 case CODE_FOR_avx2_movntdqa:
34935 case CODE_FOR_avx512f_movntdqa:
34936 aligned_mem = true;
34937 break;
34938 default:
34939 break;
34941 break;
34942 case VOID_FTYPE_PV2SF_V4SF:
34943 case VOID_FTYPE_PV8DI_V8DI:
34944 case VOID_FTYPE_PV4DI_V4DI:
34945 case VOID_FTYPE_PV2DI_V2DI:
34946 case VOID_FTYPE_PCHAR_V32QI:
34947 case VOID_FTYPE_PCHAR_V16QI:
34948 case VOID_FTYPE_PFLOAT_V16SF:
34949 case VOID_FTYPE_PFLOAT_V8SF:
34950 case VOID_FTYPE_PFLOAT_V4SF:
34951 case VOID_FTYPE_PDOUBLE_V8DF:
34952 case VOID_FTYPE_PDOUBLE_V4DF:
34953 case VOID_FTYPE_PDOUBLE_V2DF:
34954 case VOID_FTYPE_PLONGLONG_LONGLONG:
34955 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34956 case VOID_FTYPE_PINT_INT:
34957 nargs = 1;
34958 klass = store;
34959 /* Reserve memory operand for target. */
34960 memory = ARRAY_SIZE (args);
34961 switch (icode)
34963 /* These builtins and instructions require the memory
34964 to be properly aligned. */
34965 case CODE_FOR_avx_movntv4di:
34966 case CODE_FOR_sse2_movntv2di:
34967 case CODE_FOR_avx_movntv8sf:
34968 case CODE_FOR_sse_movntv4sf:
34969 case CODE_FOR_sse4a_vmmovntv4sf:
34970 case CODE_FOR_avx_movntv4df:
34971 case CODE_FOR_sse2_movntv2df:
34972 case CODE_FOR_sse4a_vmmovntv2df:
34973 case CODE_FOR_sse2_movntidi:
34974 case CODE_FOR_sse_movntq:
34975 case CODE_FOR_sse2_movntisi:
34976 case CODE_FOR_avx512f_movntv16sf:
34977 case CODE_FOR_avx512f_movntv8df:
34978 case CODE_FOR_avx512f_movntv8di:
34979 aligned_mem = true;
34980 break;
34981 default:
34982 break;
34984 break;
34985 case V4SF_FTYPE_V4SF_PCV2SF:
34986 case V2DF_FTYPE_V2DF_PCDOUBLE:
34987 nargs = 2;
34988 klass = load;
34989 memory = 1;
34990 break;
34991 case V8SF_FTYPE_PCV8SF_V8SI:
34992 case V4DF_FTYPE_PCV4DF_V4DI:
34993 case V4SF_FTYPE_PCV4SF_V4SI:
34994 case V2DF_FTYPE_PCV2DF_V2DI:
34995 case V8SI_FTYPE_PCV8SI_V8SI:
34996 case V4DI_FTYPE_PCV4DI_V4DI:
34997 case V4SI_FTYPE_PCV4SI_V4SI:
34998 case V2DI_FTYPE_PCV2DI_V2DI:
34999 nargs = 2;
35000 klass = load;
35001 memory = 0;
35002 break;
35003 case VOID_FTYPE_PV8DF_V8DF_QI:
35004 case VOID_FTYPE_PV16SF_V16SF_HI:
35005 case VOID_FTYPE_PV8DI_V8DI_QI:
35006 case VOID_FTYPE_PV16SI_V16SI_HI:
35007 switch (icode)
35009 /* These builtins and instructions require the memory
35010 to be properly aligned. */
35011 case CODE_FOR_avx512f_storev16sf_mask:
35012 case CODE_FOR_avx512f_storev16si_mask:
35013 case CODE_FOR_avx512f_storev8df_mask:
35014 case CODE_FOR_avx512f_storev8di_mask:
35015 aligned_mem = true;
35016 break;
35017 default:
35018 break;
35020 /* FALLTHRU */
35021 case VOID_FTYPE_PV8SF_V8SI_V8SF:
35022 case VOID_FTYPE_PV4DF_V4DI_V4DF:
35023 case VOID_FTYPE_PV4SF_V4SI_V4SF:
35024 case VOID_FTYPE_PV2DF_V2DI_V2DF:
35025 case VOID_FTYPE_PV8SI_V8SI_V8SI:
35026 case VOID_FTYPE_PV4DI_V4DI_V4DI:
35027 case VOID_FTYPE_PV4SI_V4SI_V4SI:
35028 case VOID_FTYPE_PV2DI_V2DI_V2DI:
35029 case VOID_FTYPE_PDOUBLE_V2DF_QI:
35030 case VOID_FTYPE_PFLOAT_V4SF_QI:
35031 case VOID_FTYPE_PV8SI_V8DI_QI:
35032 case VOID_FTYPE_PV8HI_V8DI_QI:
35033 case VOID_FTYPE_PV16HI_V16SI_HI:
35034 case VOID_FTYPE_PV16QI_V8DI_QI:
35035 case VOID_FTYPE_PV16QI_V16SI_HI:
35036 nargs = 2;
35037 klass = store;
35038 /* Reserve memory operand for target. */
35039 memory = ARRAY_SIZE (args);
35040 break;
35041 case V16SF_FTYPE_PCV16SF_V16SF_HI:
35042 case V16SI_FTYPE_PCV16SI_V16SI_HI:
35043 case V8DF_FTYPE_PCV8DF_V8DF_QI:
35044 case V8DI_FTYPE_PCV8DI_V8DI_QI:
35045 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
35046 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
35047 nargs = 3;
35048 klass = load;
35049 memory = 0;
35050 switch (icode)
35052 /* These builtins and instructions require the memory
35053 to be properly aligned. */
35054 case CODE_FOR_avx512f_loadv16sf_mask:
35055 case CODE_FOR_avx512f_loadv16si_mask:
35056 case CODE_FOR_avx512f_loadv8df_mask:
35057 case CODE_FOR_avx512f_loadv8di_mask:
35058 aligned_mem = true;
35059 break;
35060 default:
35061 break;
35063 break;
35064 case VOID_FTYPE_UINT_UINT_UINT:
35065 case VOID_FTYPE_UINT64_UINT_UINT:
35066 case UCHAR_FTYPE_UINT_UINT_UINT:
35067 case UCHAR_FTYPE_UINT64_UINT_UINT:
35068 nargs = 3;
35069 klass = load;
35070 memory = ARRAY_SIZE (args);
35071 last_arg_constant = true;
35072 break;
35073 default:
35074 gcc_unreachable ();
35077 gcc_assert (nargs <= ARRAY_SIZE (args));
35079 if (klass == store)
35081 arg = CALL_EXPR_ARG (exp, 0);
35082 op = expand_normal (arg);
35083 gcc_assert (target == 0);
35084 if (memory)
35086 op = ix86_zero_extend_to_Pmode (op);
35087 target = gen_rtx_MEM (tmode, op);
35088 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
35089 on it. Try to improve it using get_pointer_alignment,
35090 and if the special builtin is one that requires strict
35091 mode alignment, also from it's GET_MODE_ALIGNMENT.
35092 Failure to do so could lead to ix86_legitimate_combined_insn
35093 rejecting all changes to such insns. */
35094 unsigned int align = get_pointer_alignment (arg);
35095 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
35096 align = GET_MODE_ALIGNMENT (tmode);
35097 if (MEM_ALIGN (target) < align)
35098 set_mem_align (target, align);
35100 else
35101 target = force_reg (tmode, op);
35102 arg_adjust = 1;
35104 else
35106 arg_adjust = 0;
35107 if (optimize
35108 || target == 0
35109 || !register_operand (target, tmode)
35110 || GET_MODE (target) != tmode)
35111 target = gen_reg_rtx (tmode);
35114 for (i = 0; i < nargs; i++)
35116 enum machine_mode mode = insn_p->operand[i + 1].mode;
35117 bool match;
35119 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
35120 op = expand_normal (arg);
35121 match = insn_p->operand[i + 1].predicate (op, mode);
35123 if (last_arg_constant && (i + 1) == nargs)
35125 if (!match)
35127 if (icode == CODE_FOR_lwp_lwpvalsi3
35128 || icode == CODE_FOR_lwp_lwpinssi3
35129 || icode == CODE_FOR_lwp_lwpvaldi3
35130 || icode == CODE_FOR_lwp_lwpinsdi3)
35131 error ("the last argument must be a 32-bit immediate");
35132 else
35133 error ("the last argument must be an 8-bit immediate");
35134 return const0_rtx;
35137 else
35139 if (i == memory)
35141 /* This must be the memory operand. */
35142 op = ix86_zero_extend_to_Pmode (op);
35143 op = gen_rtx_MEM (mode, op);
35144 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
35145 on it. Try to improve it using get_pointer_alignment,
35146 and if the special builtin is one that requires strict
35147 mode alignment, also from it's GET_MODE_ALIGNMENT.
35148 Failure to do so could lead to ix86_legitimate_combined_insn
35149 rejecting all changes to such insns. */
35150 unsigned int align = get_pointer_alignment (arg);
35151 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
35152 align = GET_MODE_ALIGNMENT (mode);
35153 if (MEM_ALIGN (op) < align)
35154 set_mem_align (op, align);
35156 else
35158 /* This must be register. */
35159 if (VECTOR_MODE_P (mode))
35160 op = safe_vector_operand (op, mode);
35162 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35163 op = copy_to_mode_reg (mode, op);
35164 else
35166 op = copy_to_reg (op);
35167 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
35172 args[i].op = op;
35173 args[i].mode = mode;
35176 switch (nargs)
35178 case 0:
35179 pat = GEN_FCN (icode) (target);
35180 break;
35181 case 1:
35182 pat = GEN_FCN (icode) (target, args[0].op);
35183 break;
35184 case 2:
35185 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35186 break;
35187 case 3:
35188 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35189 break;
35190 default:
35191 gcc_unreachable ();
35194 if (! pat)
35195 return 0;
35196 emit_insn (pat);
35197 return klass == store ? 0 : target;
35200 /* Return the integer constant in ARG. Constrain it to be in the range
35201 of the subparts of VEC_TYPE; issue an error if not. */
35203 static int
35204 get_element_number (tree vec_type, tree arg)
35206 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
35208 if (!tree_fits_uhwi_p (arg)
35209 || (elt = tree_to_uhwi (arg), elt > max))
35211 error ("selector must be an integer constant in the range 0..%wi", max);
35212 return 0;
35215 return elt;
35218 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35219 ix86_expand_vector_init. We DO have language-level syntax for this, in
35220 the form of (type){ init-list }. Except that since we can't place emms
35221 instructions from inside the compiler, we can't allow the use of MMX
35222 registers unless the user explicitly asks for it. So we do *not* define
35223 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
35224 we have builtins invoked by mmintrin.h that gives us license to emit
35225 these sorts of instructions. */
35227 static rtx
35228 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
35230 enum machine_mode tmode = TYPE_MODE (type);
35231 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
35232 int i, n_elt = GET_MODE_NUNITS (tmode);
35233 rtvec v = rtvec_alloc (n_elt);
35235 gcc_assert (VECTOR_MODE_P (tmode));
35236 gcc_assert (call_expr_nargs (exp) == n_elt);
35238 for (i = 0; i < n_elt; ++i)
35240 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
35241 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
35244 if (!target || !register_operand (target, tmode))
35245 target = gen_reg_rtx (tmode);
35247 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35248 return target;
35251 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35252 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35253 had a language-level syntax for referencing vector elements. */
35255 static rtx
35256 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35258 enum machine_mode tmode, mode0;
35259 tree arg0, arg1;
35260 int elt;
35261 rtx op0;
35263 arg0 = CALL_EXPR_ARG (exp, 0);
35264 arg1 = CALL_EXPR_ARG (exp, 1);
35266 op0 = expand_normal (arg0);
35267 elt = get_element_number (TREE_TYPE (arg0), arg1);
35269 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35270 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35271 gcc_assert (VECTOR_MODE_P (mode0));
35273 op0 = force_reg (mode0, op0);
35275 if (optimize || !target || !register_operand (target, tmode))
35276 target = gen_reg_rtx (tmode);
35278 ix86_expand_vector_extract (true, target, op0, elt);
35280 return target;
35283 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35284 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35285 a language-level syntax for referencing vector elements. */
35287 static rtx
35288 ix86_expand_vec_set_builtin (tree exp)
35290 enum machine_mode tmode, mode1;
35291 tree arg0, arg1, arg2;
35292 int elt;
35293 rtx op0, op1, target;
35295 arg0 = CALL_EXPR_ARG (exp, 0);
35296 arg1 = CALL_EXPR_ARG (exp, 1);
35297 arg2 = CALL_EXPR_ARG (exp, 2);
35299 tmode = TYPE_MODE (TREE_TYPE (arg0));
35300 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35301 gcc_assert (VECTOR_MODE_P (tmode));
35303 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35304 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35305 elt = get_element_number (TREE_TYPE (arg0), arg2);
35307 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35308 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35310 op0 = force_reg (tmode, op0);
35311 op1 = force_reg (mode1, op1);
35313 /* OP0 is the source of these builtin functions and shouldn't be
35314 modified. Create a copy, use it and return it as target. */
35315 target = gen_reg_rtx (tmode);
35316 emit_move_insn (target, op0);
35317 ix86_expand_vector_set (true, target, op1, elt);
35319 return target;
35322 /* Choose max of DST and SRC and put it to DST. */
35323 static void
35324 ix86_emit_move_max (rtx dst, rtx src)
35326 rtx t;
35328 if (TARGET_CMOVE)
35330 t = ix86_expand_compare (LTU, dst, src);
35331 emit_insn (gen_rtx_SET (VOIDmode, dst,
35332 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
35333 src, dst)));
35335 else
35337 rtx nomove = gen_label_rtx ();
35338 emit_cmp_and_jump_insns (dst, src, GEU, const0_rtx,
35339 GET_MODE (dst), 1, nomove);
35340 emit_insn (gen_rtx_SET (VOIDmode, dst, src));
35341 emit_label (nomove);
35345 /* Expand an expression EXP that calls a built-in function,
35346 with result going to TARGET if that's convenient
35347 (and in mode MODE if that's convenient).
35348 SUBTARGET may be used as the target for computing one of EXP's operands.
35349 IGNORE is nonzero if the value is to be ignored. */
35351 static rtx
35352 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35353 enum machine_mode mode, int ignore)
35355 const struct builtin_description *d;
35356 size_t i;
35357 enum insn_code icode;
35358 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35359 tree arg0, arg1, arg2, arg3, arg4;
35360 rtx op0, op1, op2, op3, op4, pat, insn;
35361 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35362 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35364 /* For CPU builtins that can be folded, fold first and expand the fold. */
35365 switch (fcode)
35367 case IX86_BUILTIN_CPU_INIT:
35369 /* Make it call __cpu_indicator_init in libgcc. */
35370 tree call_expr, fndecl, type;
35371 type = build_function_type_list (integer_type_node, NULL_TREE);
35372 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35373 call_expr = build_call_expr (fndecl, 0);
35374 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35376 case IX86_BUILTIN_CPU_IS:
35377 case IX86_BUILTIN_CPU_SUPPORTS:
35379 tree arg0 = CALL_EXPR_ARG (exp, 0);
35380 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35381 gcc_assert (fold_expr != NULL_TREE);
35382 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35386 /* Determine whether the builtin function is available under the current ISA.
35387 Originally the builtin was not created if it wasn't applicable to the
35388 current ISA based on the command line switches. With function specific
35389 options, we need to check in the context of the function making the call
35390 whether it is supported. */
35391 if (ix86_builtins_isa[fcode].isa
35392 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35394 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35395 NULL, (enum fpmath_unit) 0, false);
35397 if (!opts)
35398 error ("%qE needs unknown isa option", fndecl);
35399 else
35401 gcc_assert (opts != NULL);
35402 error ("%qE needs isa option %s", fndecl, opts);
35403 free (opts);
35405 return const0_rtx;
35408 switch (fcode)
35410 case IX86_BUILTIN_BNDMK:
35411 arg0 = CALL_EXPR_ARG (exp, 0);
35412 arg1 = CALL_EXPR_ARG (exp, 1);
35414 /* Builtin arg1 is size of block but instruction op1 should
35415 be (size - 1). */
35416 op0 = expand_normal (arg0);
35417 op1 = expand_normal (fold_build2 (PLUS_EXPR, TREE_TYPE (arg1),
35418 arg1, integer_minus_one_node));
35419 op0 = force_reg (Pmode, op0);
35420 op1 = force_reg (Pmode, op1);
35422 emit_insn (TARGET_64BIT
35423 ? gen_bnd64_mk (target, op0, op1)
35424 : gen_bnd32_mk (target, op0, op1));
35425 return target;
35427 case IX86_BUILTIN_BNDSTX:
35428 arg0 = CALL_EXPR_ARG (exp, 0);
35429 arg1 = CALL_EXPR_ARG (exp, 1);
35430 arg2 = CALL_EXPR_ARG (exp, 2);
35432 op0 = expand_normal (arg0);
35433 op1 = expand_normal (arg1);
35434 op2 = expand_normal (arg2);
35436 op0 = force_reg (Pmode, op0);
35437 op1 = force_reg (BNDmode, op1);
35438 op2 = force_reg (Pmode, op2);
35440 emit_insn (TARGET_64BIT
35441 ? gen_bnd64_stx (op2, op0, op1)
35442 : gen_bnd32_stx (op2, op0, op1));
35443 return 0;
35445 case IX86_BUILTIN_BNDLDX:
35446 arg0 = CALL_EXPR_ARG (exp, 0);
35447 arg1 = CALL_EXPR_ARG (exp, 1);
35449 op0 = expand_normal (arg0);
35450 op1 = expand_normal (arg1);
35452 op0 = force_reg (Pmode, op0);
35453 op1 = force_reg (Pmode, op1);
35455 /* Avoid registers which connot be used as index. */
35456 if (!index_register_operand (op1, Pmode))
35458 rtx temp = gen_reg_rtx (Pmode);
35459 emit_move_insn (temp, op1);
35460 op1 = temp;
35463 /* If op1 was a register originally then it may have
35464 mode other than Pmode. We need to extend in such
35465 case because bndldx may work only with Pmode regs. */
35466 if (GET_MODE (op1) != Pmode)
35467 op1 = ix86_zero_extend_to_Pmode (op1);
35469 if (REG_P (target))
35470 emit_insn (TARGET_64BIT
35471 ? gen_bnd64_ldx (target, op0, op1)
35472 : gen_bnd32_ldx (target, op0, op1));
35473 else
35475 rtx temp = gen_reg_rtx (BNDmode);
35476 emit_insn (TARGET_64BIT
35477 ? gen_bnd64_ldx (temp, op0, op1)
35478 : gen_bnd32_ldx (temp, op0, op1));
35479 emit_move_insn (target, temp);
35481 return target;
35483 case IX86_BUILTIN_BNDCL:
35484 arg0 = CALL_EXPR_ARG (exp, 0);
35485 arg1 = CALL_EXPR_ARG (exp, 1);
35487 op0 = expand_normal (arg0);
35488 op1 = expand_normal (arg1);
35490 op0 = force_reg (Pmode, op0);
35491 op1 = force_reg (BNDmode, op1);
35493 emit_insn (TARGET_64BIT
35494 ? gen_bnd64_cl (op1, op0)
35495 : gen_bnd32_cl (op1, op0));
35496 return 0;
35498 case IX86_BUILTIN_BNDCU:
35499 arg0 = CALL_EXPR_ARG (exp, 0);
35500 arg1 = CALL_EXPR_ARG (exp, 1);
35502 op0 = expand_normal (arg0);
35503 op1 = expand_normal (arg1);
35505 op0 = force_reg (Pmode, op0);
35506 op1 = force_reg (BNDmode, op1);
35508 emit_insn (TARGET_64BIT
35509 ? gen_bnd64_cu (op1, op0)
35510 : gen_bnd32_cu (op1, op0));
35511 return 0;
35513 case IX86_BUILTIN_BNDRET:
35514 arg0 = CALL_EXPR_ARG (exp, 0);
35515 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
35516 target = chkp_get_rtl_bounds (arg0);
35517 /* If no bounds were specified for returned value,
35518 then use INIT bounds. It usually happens when
35519 some built-in function is expanded. */
35520 if (!target)
35522 rtx t1 = gen_reg_rtx (Pmode);
35523 rtx t2 = gen_reg_rtx (Pmode);
35524 target = gen_reg_rtx (BNDmode);
35525 emit_move_insn (t1, const0_rtx);
35526 emit_move_insn (t2, constm1_rtx);
35527 emit_insn (TARGET_64BIT
35528 ? gen_bnd64_mk (target, t1, t2)
35529 : gen_bnd32_mk (target, t1, t2));
35531 gcc_assert (target && REG_P (target));
35532 return target;
35534 case IX86_BUILTIN_BNDNARROW:
35536 rtx m1, m1h1, m1h2, lb, ub, t1;
35538 /* Return value and lb. */
35539 arg0 = CALL_EXPR_ARG (exp, 0);
35540 /* Bounds. */
35541 arg1 = CALL_EXPR_ARG (exp, 1);
35542 /* Size. */
35543 arg2 = CALL_EXPR_ARG (exp, 2);
35545 /* Size was passed but we need to use (size - 1) as for bndmk. */
35546 arg2 = fold_build2 (PLUS_EXPR, TREE_TYPE (arg2), arg2,
35547 integer_minus_one_node);
35549 /* Add LB to size and inverse to get UB. */
35550 arg2 = fold_build2 (PLUS_EXPR, TREE_TYPE (arg2), arg2, arg0);
35551 arg2 = fold_build1 (BIT_NOT_EXPR, TREE_TYPE (arg2), arg2);
35553 op0 = expand_normal (arg0);
35554 op1 = expand_normal (arg1);
35555 op2 = expand_normal (arg2);
35557 lb = force_reg (Pmode, op0);
35558 ub = force_reg (Pmode, op2);
35560 /* We need to move bounds to memory before any computations. */
35561 if (!MEM_P (op1))
35563 m1 = assign_stack_local (BNDmode, GET_MODE_SIZE (BNDmode), 0);
35564 emit_insn (gen_move_insn (m1, op1));
35566 else
35567 m1 = op1;
35569 /* Generate mem expression to be used for access to LB and UB. */
35570 m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
35571 m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0),
35572 GET_MODE_SIZE (Pmode)));
35574 t1 = gen_reg_rtx (Pmode);
35576 /* Compute LB. */
35577 emit_move_insn (t1, m1h1);
35578 ix86_emit_move_max (t1, lb);
35579 emit_move_insn (m1h1, t1);
35582 /* Compute UB. UB are stored in 1's complement form. Therefore
35583 we also use max here. */
35584 emit_move_insn (t1, m1h2);
35585 ix86_emit_move_max (t1, ub);
35586 emit_move_insn (m1h2, t1);
35588 op2 = gen_reg_rtx (BNDmode);
35589 emit_move_insn (op2, m1);
35591 return chkp_join_splitted_slot (op0, op2);
35594 case IX86_BUILTIN_BNDINT:
35596 unsigned bndsize = GET_MODE_SIZE (BNDmode);
35597 unsigned psize = GET_MODE_SIZE (Pmode);
35598 rtx res = assign_stack_local (BNDmode, bndsize, 0);
35599 rtx m1, m2, m1h1, m1h2, m2h1, m2h2, t1, t2, rh1, rh2;
35601 arg0 = CALL_EXPR_ARG (exp, 0);
35602 arg1 = CALL_EXPR_ARG (exp, 1);
35604 op0 = expand_normal (arg0);
35605 op1 = expand_normal (arg1);
35607 /* We need to move bounds to memory before any computations. */
35608 if (!MEM_P (op0))
35610 m1 = assign_stack_local (BNDmode, bndsize, 0);
35611 emit_insn (gen_move_insn (m1, op0));
35613 else
35614 m1 = op0;
35616 if (!MEM_P (op1))
35618 m2 = assign_stack_local (BNDmode, bndsize, 0);
35619 emit_move_insn (m2, op1);
35621 else
35622 m2 = op1;
35624 /* Generate mem expression to be used for access to LB and UB. */
35625 m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
35626 m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0), psize));
35627 m2h1 = gen_rtx_MEM (Pmode, XEXP (m2, 0));
35628 m2h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m2, 0), psize));
35629 rh1 = gen_rtx_MEM (Pmode, XEXP (res, 0));
35630 rh2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (res, 0), psize));
35632 /* Allocate temporaries. */
35633 t1 = gen_reg_rtx (Pmode);
35634 t2 = gen_reg_rtx (Pmode);
35636 /* Compute LB. */
35637 emit_move_insn (t1, m1h1);
35638 emit_move_insn (t2, m2h1);
35639 ix86_emit_move_max (t1, t2);
35640 emit_move_insn (rh1, t1);
35642 /* Compute UB. UB are stored in 1's complement form. Therefore
35643 we also use max here. */
35644 emit_move_insn (t1, m1h2);
35645 emit_move_insn (t2, m2h2);
35646 ix86_emit_move_max (t1, t2);
35647 emit_move_insn (rh2, t1);
35649 return res;
35652 case IX86_BUILTIN_SIZEOF:
35654 enum machine_mode mode = Pmode;
35655 rtx t1, t2;
35657 arg0 = CALL_EXPR_ARG (exp, 0);
35658 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
35660 t1 = gen_reg_rtx (mode);
35661 t2 = gen_rtx_SYMBOL_REF (Pmode,
35662 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (arg0)));
35663 t2 = gen_rtx_UNSPEC (mode, gen_rtvec (1, t2), UNSPEC_SIZEOF);
35665 emit_insn (gen_rtx_SET (VOIDmode, t1, t2));
35667 return t1;
35670 case IX86_BUILTIN_BNDLOWER:
35672 rtx mem, hmem;
35674 arg0 = CALL_EXPR_ARG (exp, 0);
35675 op0 = expand_normal (arg0);
35677 /* We need to move bounds to memory first. */
35678 if (!MEM_P (op0))
35680 mem = assign_stack_local (BNDmode, GET_MODE_SIZE (BNDmode), 0);
35681 emit_insn (gen_move_insn (mem, op0));
35683 else
35684 mem = op0;
35686 /* Generate mem expression to access LB and load it. */
35687 hmem = gen_rtx_MEM (Pmode, XEXP (mem, 0));
35688 target = gen_reg_rtx (Pmode);
35689 emit_move_insn (target, hmem);
35691 return target;
35694 case IX86_BUILTIN_BNDUPPER:
35696 rtx mem, hmem;
35698 arg0 = CALL_EXPR_ARG (exp, 0);
35699 op0 = expand_normal (arg0);
35701 /* We need to move bounds to memory first. */
35702 if (!MEM_P (op0))
35704 mem = assign_stack_local (BNDmode, GET_MODE_SIZE (BNDmode), 0);
35705 emit_insn (gen_move_insn (mem, op0));
35707 else
35708 mem = op0;
35710 /* Generate mem expression to access UB and load it. */
35711 hmem = gen_rtx_MEM (Pmode,
35712 gen_rtx_PLUS (Pmode, XEXP (mem, 0),
35713 GEN_INT (GET_MODE_SIZE (Pmode))));
35714 target = gen_reg_rtx (Pmode);
35715 emit_move_insn (target, hmem);
35717 /* We need to inverse all bits of UB. */
35718 emit_insn (gen_rtx_SET (Pmode, target, gen_rtx_NOT (Pmode, target)));
35720 return target;
35723 case IX86_BUILTIN_MASKMOVQ:
35724 case IX86_BUILTIN_MASKMOVDQU:
35725 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35726 ? CODE_FOR_mmx_maskmovq
35727 : CODE_FOR_sse2_maskmovdqu);
35728 /* Note the arg order is different from the operand order. */
35729 arg1 = CALL_EXPR_ARG (exp, 0);
35730 arg2 = CALL_EXPR_ARG (exp, 1);
35731 arg0 = CALL_EXPR_ARG (exp, 2);
35732 op0 = expand_normal (arg0);
35733 op1 = expand_normal (arg1);
35734 op2 = expand_normal (arg2);
35735 mode0 = insn_data[icode].operand[0].mode;
35736 mode1 = insn_data[icode].operand[1].mode;
35737 mode2 = insn_data[icode].operand[2].mode;
35739 op0 = ix86_zero_extend_to_Pmode (op0);
35740 op0 = gen_rtx_MEM (mode1, op0);
35742 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35743 op0 = copy_to_mode_reg (mode0, op0);
35744 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35745 op1 = copy_to_mode_reg (mode1, op1);
35746 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35747 op2 = copy_to_mode_reg (mode2, op2);
35748 pat = GEN_FCN (icode) (op0, op1, op2);
35749 if (! pat)
35750 return 0;
35751 emit_insn (pat);
35752 return 0;
35754 case IX86_BUILTIN_LDMXCSR:
35755 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35756 target = assign_386_stack_local (SImode, SLOT_TEMP);
35757 emit_move_insn (target, op0);
35758 emit_insn (gen_sse_ldmxcsr (target));
35759 return 0;
35761 case IX86_BUILTIN_STMXCSR:
35762 target = assign_386_stack_local (SImode, SLOT_TEMP);
35763 emit_insn (gen_sse_stmxcsr (target));
35764 return copy_to_mode_reg (SImode, target);
35766 case IX86_BUILTIN_CLFLUSH:
35767 arg0 = CALL_EXPR_ARG (exp, 0);
35768 op0 = expand_normal (arg0);
35769 icode = CODE_FOR_sse2_clflush;
35770 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35771 op0 = ix86_zero_extend_to_Pmode (op0);
35773 emit_insn (gen_sse2_clflush (op0));
35774 return 0;
35776 case IX86_BUILTIN_MONITOR:
35777 arg0 = CALL_EXPR_ARG (exp, 0);
35778 arg1 = CALL_EXPR_ARG (exp, 1);
35779 arg2 = CALL_EXPR_ARG (exp, 2);
35780 op0 = expand_normal (arg0);
35781 op1 = expand_normal (arg1);
35782 op2 = expand_normal (arg2);
35783 if (!REG_P (op0))
35784 op0 = ix86_zero_extend_to_Pmode (op0);
35785 if (!REG_P (op1))
35786 op1 = copy_to_mode_reg (SImode, op1);
35787 if (!REG_P (op2))
35788 op2 = copy_to_mode_reg (SImode, op2);
35789 emit_insn (ix86_gen_monitor (op0, op1, op2));
35790 return 0;
35792 case IX86_BUILTIN_MWAIT:
35793 arg0 = CALL_EXPR_ARG (exp, 0);
35794 arg1 = CALL_EXPR_ARG (exp, 1);
35795 op0 = expand_normal (arg0);
35796 op1 = expand_normal (arg1);
35797 if (!REG_P (op0))
35798 op0 = copy_to_mode_reg (SImode, op0);
35799 if (!REG_P (op1))
35800 op1 = copy_to_mode_reg (SImode, op1);
35801 emit_insn (gen_sse3_mwait (op0, op1));
35802 return 0;
35804 case IX86_BUILTIN_VEC_INIT_V2SI:
35805 case IX86_BUILTIN_VEC_INIT_V4HI:
35806 case IX86_BUILTIN_VEC_INIT_V8QI:
35807 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35809 case IX86_BUILTIN_VEC_EXT_V2DF:
35810 case IX86_BUILTIN_VEC_EXT_V2DI:
35811 case IX86_BUILTIN_VEC_EXT_V4SF:
35812 case IX86_BUILTIN_VEC_EXT_V4SI:
35813 case IX86_BUILTIN_VEC_EXT_V8HI:
35814 case IX86_BUILTIN_VEC_EXT_V2SI:
35815 case IX86_BUILTIN_VEC_EXT_V4HI:
35816 case IX86_BUILTIN_VEC_EXT_V16QI:
35817 return ix86_expand_vec_ext_builtin (exp, target);
35819 case IX86_BUILTIN_VEC_SET_V2DI:
35820 case IX86_BUILTIN_VEC_SET_V4SF:
35821 case IX86_BUILTIN_VEC_SET_V4SI:
35822 case IX86_BUILTIN_VEC_SET_V8HI:
35823 case IX86_BUILTIN_VEC_SET_V4HI:
35824 case IX86_BUILTIN_VEC_SET_V16QI:
35825 return ix86_expand_vec_set_builtin (exp);
35827 case IX86_BUILTIN_INFQ:
35828 case IX86_BUILTIN_HUGE_VALQ:
35830 REAL_VALUE_TYPE inf;
35831 rtx tmp;
35833 real_inf (&inf);
35834 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35836 tmp = validize_mem (force_const_mem (mode, tmp));
35838 if (target == 0)
35839 target = gen_reg_rtx (mode);
35841 emit_move_insn (target, tmp);
35842 return target;
35845 case IX86_BUILTIN_RDPMC:
35846 case IX86_BUILTIN_RDTSC:
35847 case IX86_BUILTIN_RDTSCP:
35849 op0 = gen_reg_rtx (DImode);
35850 op1 = gen_reg_rtx (DImode);
35852 if (fcode == IX86_BUILTIN_RDPMC)
35854 arg0 = CALL_EXPR_ARG (exp, 0);
35855 op2 = expand_normal (arg0);
35856 if (!register_operand (op2, SImode))
35857 op2 = copy_to_mode_reg (SImode, op2);
35859 insn = (TARGET_64BIT
35860 ? gen_rdpmc_rex64 (op0, op1, op2)
35861 : gen_rdpmc (op0, op2));
35862 emit_insn (insn);
35864 else if (fcode == IX86_BUILTIN_RDTSC)
35866 insn = (TARGET_64BIT
35867 ? gen_rdtsc_rex64 (op0, op1)
35868 : gen_rdtsc (op0));
35869 emit_insn (insn);
35871 else
35873 op2 = gen_reg_rtx (SImode);
35875 insn = (TARGET_64BIT
35876 ? gen_rdtscp_rex64 (op0, op1, op2)
35877 : gen_rdtscp (op0, op2));
35878 emit_insn (insn);
35880 arg0 = CALL_EXPR_ARG (exp, 0);
35881 op4 = expand_normal (arg0);
35882 if (!address_operand (op4, VOIDmode))
35884 op4 = convert_memory_address (Pmode, op4);
35885 op4 = copy_addr_to_reg (op4);
35887 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35890 if (target == 0)
35892 /* mode is VOIDmode if __builtin_rd* has been called
35893 without lhs. */
35894 if (mode == VOIDmode)
35895 return target;
35896 target = gen_reg_rtx (mode);
35899 if (TARGET_64BIT)
35901 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35902 op1, 1, OPTAB_DIRECT);
35903 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35904 op0, 1, OPTAB_DIRECT);
35907 emit_move_insn (target, op0);
35908 return target;
35910 case IX86_BUILTIN_FXSAVE:
35911 case IX86_BUILTIN_FXRSTOR:
35912 case IX86_BUILTIN_FXSAVE64:
35913 case IX86_BUILTIN_FXRSTOR64:
35914 case IX86_BUILTIN_FNSTENV:
35915 case IX86_BUILTIN_FLDENV:
35916 case IX86_BUILTIN_FNSTSW:
35917 mode0 = BLKmode;
35918 switch (fcode)
35920 case IX86_BUILTIN_FXSAVE:
35921 icode = CODE_FOR_fxsave;
35922 break;
35923 case IX86_BUILTIN_FXRSTOR:
35924 icode = CODE_FOR_fxrstor;
35925 break;
35926 case IX86_BUILTIN_FXSAVE64:
35927 icode = CODE_FOR_fxsave64;
35928 break;
35929 case IX86_BUILTIN_FXRSTOR64:
35930 icode = CODE_FOR_fxrstor64;
35931 break;
35932 case IX86_BUILTIN_FNSTENV:
35933 icode = CODE_FOR_fnstenv;
35934 break;
35935 case IX86_BUILTIN_FLDENV:
35936 icode = CODE_FOR_fldenv;
35937 break;
35938 case IX86_BUILTIN_FNSTSW:
35939 icode = CODE_FOR_fnstsw;
35940 mode0 = HImode;
35941 break;
35942 default:
35943 gcc_unreachable ();
35946 arg0 = CALL_EXPR_ARG (exp, 0);
35947 op0 = expand_normal (arg0);
35949 if (!address_operand (op0, VOIDmode))
35951 op0 = convert_memory_address (Pmode, op0);
35952 op0 = copy_addr_to_reg (op0);
35954 op0 = gen_rtx_MEM (mode0, op0);
35956 pat = GEN_FCN (icode) (op0);
35957 if (pat)
35958 emit_insn (pat);
35959 return 0;
35961 case IX86_BUILTIN_XSAVE:
35962 case IX86_BUILTIN_XRSTOR:
35963 case IX86_BUILTIN_XSAVE64:
35964 case IX86_BUILTIN_XRSTOR64:
35965 case IX86_BUILTIN_XSAVEOPT:
35966 case IX86_BUILTIN_XSAVEOPT64:
35967 arg0 = CALL_EXPR_ARG (exp, 0);
35968 arg1 = CALL_EXPR_ARG (exp, 1);
35969 op0 = expand_normal (arg0);
35970 op1 = expand_normal (arg1);
35972 if (!address_operand (op0, VOIDmode))
35974 op0 = convert_memory_address (Pmode, op0);
35975 op0 = copy_addr_to_reg (op0);
35977 op0 = gen_rtx_MEM (BLKmode, op0);
35979 op1 = force_reg (DImode, op1);
35981 if (TARGET_64BIT)
35983 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35984 NULL, 1, OPTAB_DIRECT);
35985 switch (fcode)
35987 case IX86_BUILTIN_XSAVE:
35988 icode = CODE_FOR_xsave_rex64;
35989 break;
35990 case IX86_BUILTIN_XRSTOR:
35991 icode = CODE_FOR_xrstor_rex64;
35992 break;
35993 case IX86_BUILTIN_XSAVE64:
35994 icode = CODE_FOR_xsave64;
35995 break;
35996 case IX86_BUILTIN_XRSTOR64:
35997 icode = CODE_FOR_xrstor64;
35998 break;
35999 case IX86_BUILTIN_XSAVEOPT:
36000 icode = CODE_FOR_xsaveopt_rex64;
36001 break;
36002 case IX86_BUILTIN_XSAVEOPT64:
36003 icode = CODE_FOR_xsaveopt64;
36004 break;
36005 default:
36006 gcc_unreachable ();
36009 op2 = gen_lowpart (SImode, op2);
36010 op1 = gen_lowpart (SImode, op1);
36011 pat = GEN_FCN (icode) (op0, op1, op2);
36013 else
36015 switch (fcode)
36017 case IX86_BUILTIN_XSAVE:
36018 icode = CODE_FOR_xsave;
36019 break;
36020 case IX86_BUILTIN_XRSTOR:
36021 icode = CODE_FOR_xrstor;
36022 break;
36023 case IX86_BUILTIN_XSAVEOPT:
36024 icode = CODE_FOR_xsaveopt;
36025 break;
36026 default:
36027 gcc_unreachable ();
36029 pat = GEN_FCN (icode) (op0, op1);
36032 if (pat)
36033 emit_insn (pat);
36034 return 0;
36036 case IX86_BUILTIN_LLWPCB:
36037 arg0 = CALL_EXPR_ARG (exp, 0);
36038 op0 = expand_normal (arg0);
36039 icode = CODE_FOR_lwp_llwpcb;
36040 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36041 op0 = ix86_zero_extend_to_Pmode (op0);
36042 emit_insn (gen_lwp_llwpcb (op0));
36043 return 0;
36045 case IX86_BUILTIN_SLWPCB:
36046 icode = CODE_FOR_lwp_slwpcb;
36047 if (!target
36048 || !insn_data[icode].operand[0].predicate (target, Pmode))
36049 target = gen_reg_rtx (Pmode);
36050 emit_insn (gen_lwp_slwpcb (target));
36051 return target;
36053 case IX86_BUILTIN_BEXTRI32:
36054 case IX86_BUILTIN_BEXTRI64:
36055 arg0 = CALL_EXPR_ARG (exp, 0);
36056 arg1 = CALL_EXPR_ARG (exp, 1);
36057 op0 = expand_normal (arg0);
36058 op1 = expand_normal (arg1);
36059 icode = (fcode == IX86_BUILTIN_BEXTRI32
36060 ? CODE_FOR_tbm_bextri_si
36061 : CODE_FOR_tbm_bextri_di);
36062 if (!CONST_INT_P (op1))
36064 error ("last argument must be an immediate");
36065 return const0_rtx;
36067 else
36069 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
36070 unsigned char lsb_index = INTVAL (op1) & 0xFF;
36071 op1 = GEN_INT (length);
36072 op2 = GEN_INT (lsb_index);
36073 pat = GEN_FCN (icode) (target, op0, op1, op2);
36074 if (pat)
36075 emit_insn (pat);
36076 return target;
36079 case IX86_BUILTIN_RDRAND16_STEP:
36080 icode = CODE_FOR_rdrandhi_1;
36081 mode0 = HImode;
36082 goto rdrand_step;
36084 case IX86_BUILTIN_RDRAND32_STEP:
36085 icode = CODE_FOR_rdrandsi_1;
36086 mode0 = SImode;
36087 goto rdrand_step;
36089 case IX86_BUILTIN_RDRAND64_STEP:
36090 icode = CODE_FOR_rdranddi_1;
36091 mode0 = DImode;
36093 rdrand_step:
36094 op0 = gen_reg_rtx (mode0);
36095 emit_insn (GEN_FCN (icode) (op0));
36097 arg0 = CALL_EXPR_ARG (exp, 0);
36098 op1 = expand_normal (arg0);
36099 if (!address_operand (op1, VOIDmode))
36101 op1 = convert_memory_address (Pmode, op1);
36102 op1 = copy_addr_to_reg (op1);
36104 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
36106 op1 = gen_reg_rtx (SImode);
36107 emit_move_insn (op1, CONST1_RTX (SImode));
36109 /* Emit SImode conditional move. */
36110 if (mode0 == HImode)
36112 op2 = gen_reg_rtx (SImode);
36113 emit_insn (gen_zero_extendhisi2 (op2, op0));
36115 else if (mode0 == SImode)
36116 op2 = op0;
36117 else
36118 op2 = gen_rtx_SUBREG (SImode, op0, 0);
36120 if (target == 0)
36121 target = gen_reg_rtx (SImode);
36123 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
36124 const0_rtx);
36125 emit_insn (gen_rtx_SET (VOIDmode, target,
36126 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
36127 return target;
36129 case IX86_BUILTIN_RDSEED16_STEP:
36130 icode = CODE_FOR_rdseedhi_1;
36131 mode0 = HImode;
36132 goto rdseed_step;
36134 case IX86_BUILTIN_RDSEED32_STEP:
36135 icode = CODE_FOR_rdseedsi_1;
36136 mode0 = SImode;
36137 goto rdseed_step;
36139 case IX86_BUILTIN_RDSEED64_STEP:
36140 icode = CODE_FOR_rdseeddi_1;
36141 mode0 = DImode;
36143 rdseed_step:
36144 op0 = gen_reg_rtx (mode0);
36145 emit_insn (GEN_FCN (icode) (op0));
36147 arg0 = CALL_EXPR_ARG (exp, 0);
36148 op1 = expand_normal (arg0);
36149 if (!address_operand (op1, VOIDmode))
36151 op1 = convert_memory_address (Pmode, op1);
36152 op1 = copy_addr_to_reg (op1);
36154 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
36156 op2 = gen_reg_rtx (QImode);
36158 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
36159 const0_rtx);
36160 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
36162 if (target == 0)
36163 target = gen_reg_rtx (SImode);
36165 emit_insn (gen_zero_extendqisi2 (target, op2));
36166 return target;
36168 case IX86_BUILTIN_ADDCARRYX32:
36169 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
36170 mode0 = SImode;
36171 goto addcarryx;
36173 case IX86_BUILTIN_ADDCARRYX64:
36174 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
36175 mode0 = DImode;
36177 addcarryx:
36178 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
36179 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
36180 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
36181 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
36183 op0 = gen_reg_rtx (QImode);
36185 /* Generate CF from input operand. */
36186 op1 = expand_normal (arg0);
36187 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
36188 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
36190 /* Gen ADCX instruction to compute X+Y+CF. */
36191 op2 = expand_normal (arg1);
36192 op3 = expand_normal (arg2);
36194 if (!REG_P (op2))
36195 op2 = copy_to_mode_reg (mode0, op2);
36196 if (!REG_P (op3))
36197 op3 = copy_to_mode_reg (mode0, op3);
36199 op0 = gen_reg_rtx (mode0);
36201 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
36202 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
36203 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
36205 /* Store the result. */
36206 op4 = expand_normal (arg3);
36207 if (!address_operand (op4, VOIDmode))
36209 op4 = convert_memory_address (Pmode, op4);
36210 op4 = copy_addr_to_reg (op4);
36212 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
36214 /* Return current CF value. */
36215 if (target == 0)
36216 target = gen_reg_rtx (QImode);
36218 PUT_MODE (pat, QImode);
36219 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
36220 return target;
36222 case IX86_BUILTIN_READ_FLAGS:
36223 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
36225 if (optimize
36226 || target == NULL_RTX
36227 || !nonimmediate_operand (target, word_mode)
36228 || GET_MODE (target) != word_mode)
36229 target = gen_reg_rtx (word_mode);
36231 emit_insn (gen_pop (target));
36232 return target;
36234 case IX86_BUILTIN_WRITE_FLAGS:
36236 arg0 = CALL_EXPR_ARG (exp, 0);
36237 op0 = expand_normal (arg0);
36238 if (!general_no_elim_operand (op0, word_mode))
36239 op0 = copy_to_mode_reg (word_mode, op0);
36241 emit_insn (gen_push (op0));
36242 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
36243 return 0;
36245 case IX86_BUILTIN_KORTESTC16:
36246 icode = CODE_FOR_kortestchi;
36247 mode0 = HImode;
36248 mode1 = CCCmode;
36249 goto kortest;
36251 case IX86_BUILTIN_KORTESTZ16:
36252 icode = CODE_FOR_kortestzhi;
36253 mode0 = HImode;
36254 mode1 = CCZmode;
36256 kortest:
36257 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
36258 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
36259 op0 = expand_normal (arg0);
36260 op1 = expand_normal (arg1);
36262 op0 = copy_to_reg (op0);
36263 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36264 op1 = copy_to_reg (op1);
36265 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
36267 target = gen_reg_rtx (QImode);
36268 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
36270 /* Emit kortest. */
36271 emit_insn (GEN_FCN (icode) (op0, op1));
36272 /* And use setcc to return result from flags. */
36273 ix86_expand_setcc (target, EQ,
36274 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
36275 return target;
36277 case IX86_BUILTIN_GATHERSIV2DF:
36278 icode = CODE_FOR_avx2_gathersiv2df;
36279 goto gather_gen;
36280 case IX86_BUILTIN_GATHERSIV4DF:
36281 icode = CODE_FOR_avx2_gathersiv4df;
36282 goto gather_gen;
36283 case IX86_BUILTIN_GATHERDIV2DF:
36284 icode = CODE_FOR_avx2_gatherdiv2df;
36285 goto gather_gen;
36286 case IX86_BUILTIN_GATHERDIV4DF:
36287 icode = CODE_FOR_avx2_gatherdiv4df;
36288 goto gather_gen;
36289 case IX86_BUILTIN_GATHERSIV4SF:
36290 icode = CODE_FOR_avx2_gathersiv4sf;
36291 goto gather_gen;
36292 case IX86_BUILTIN_GATHERSIV8SF:
36293 icode = CODE_FOR_avx2_gathersiv8sf;
36294 goto gather_gen;
36295 case IX86_BUILTIN_GATHERDIV4SF:
36296 icode = CODE_FOR_avx2_gatherdiv4sf;
36297 goto gather_gen;
36298 case IX86_BUILTIN_GATHERDIV8SF:
36299 icode = CODE_FOR_avx2_gatherdiv8sf;
36300 goto gather_gen;
36301 case IX86_BUILTIN_GATHERSIV2DI:
36302 icode = CODE_FOR_avx2_gathersiv2di;
36303 goto gather_gen;
36304 case IX86_BUILTIN_GATHERSIV4DI:
36305 icode = CODE_FOR_avx2_gathersiv4di;
36306 goto gather_gen;
36307 case IX86_BUILTIN_GATHERDIV2DI:
36308 icode = CODE_FOR_avx2_gatherdiv2di;
36309 goto gather_gen;
36310 case IX86_BUILTIN_GATHERDIV4DI:
36311 icode = CODE_FOR_avx2_gatherdiv4di;
36312 goto gather_gen;
36313 case IX86_BUILTIN_GATHERSIV4SI:
36314 icode = CODE_FOR_avx2_gathersiv4si;
36315 goto gather_gen;
36316 case IX86_BUILTIN_GATHERSIV8SI:
36317 icode = CODE_FOR_avx2_gathersiv8si;
36318 goto gather_gen;
36319 case IX86_BUILTIN_GATHERDIV4SI:
36320 icode = CODE_FOR_avx2_gatherdiv4si;
36321 goto gather_gen;
36322 case IX86_BUILTIN_GATHERDIV8SI:
36323 icode = CODE_FOR_avx2_gatherdiv8si;
36324 goto gather_gen;
36325 case IX86_BUILTIN_GATHERALTSIV4DF:
36326 icode = CODE_FOR_avx2_gathersiv4df;
36327 goto gather_gen;
36328 case IX86_BUILTIN_GATHERALTDIV8SF:
36329 icode = CODE_FOR_avx2_gatherdiv8sf;
36330 goto gather_gen;
36331 case IX86_BUILTIN_GATHERALTSIV4DI:
36332 icode = CODE_FOR_avx2_gathersiv4di;
36333 goto gather_gen;
36334 case IX86_BUILTIN_GATHERALTDIV8SI:
36335 icode = CODE_FOR_avx2_gatherdiv8si;
36336 goto gather_gen;
36337 case IX86_BUILTIN_GATHER3SIV16SF:
36338 icode = CODE_FOR_avx512f_gathersiv16sf;
36339 goto gather_gen;
36340 case IX86_BUILTIN_GATHER3SIV8DF:
36341 icode = CODE_FOR_avx512f_gathersiv8df;
36342 goto gather_gen;
36343 case IX86_BUILTIN_GATHER3DIV16SF:
36344 icode = CODE_FOR_avx512f_gatherdiv16sf;
36345 goto gather_gen;
36346 case IX86_BUILTIN_GATHER3DIV8DF:
36347 icode = CODE_FOR_avx512f_gatherdiv8df;
36348 goto gather_gen;
36349 case IX86_BUILTIN_GATHER3SIV16SI:
36350 icode = CODE_FOR_avx512f_gathersiv16si;
36351 goto gather_gen;
36352 case IX86_BUILTIN_GATHER3SIV8DI:
36353 icode = CODE_FOR_avx512f_gathersiv8di;
36354 goto gather_gen;
36355 case IX86_BUILTIN_GATHER3DIV16SI:
36356 icode = CODE_FOR_avx512f_gatherdiv16si;
36357 goto gather_gen;
36358 case IX86_BUILTIN_GATHER3DIV8DI:
36359 icode = CODE_FOR_avx512f_gatherdiv8di;
36360 goto gather_gen;
36361 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36362 icode = CODE_FOR_avx512f_gathersiv8df;
36363 goto gather_gen;
36364 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36365 icode = CODE_FOR_avx512f_gatherdiv16sf;
36366 goto gather_gen;
36367 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36368 icode = CODE_FOR_avx512f_gathersiv8di;
36369 goto gather_gen;
36370 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36371 icode = CODE_FOR_avx512f_gatherdiv16si;
36372 goto gather_gen;
36373 case IX86_BUILTIN_SCATTERSIV16SF:
36374 icode = CODE_FOR_avx512f_scattersiv16sf;
36375 goto scatter_gen;
36376 case IX86_BUILTIN_SCATTERSIV8DF:
36377 icode = CODE_FOR_avx512f_scattersiv8df;
36378 goto scatter_gen;
36379 case IX86_BUILTIN_SCATTERDIV16SF:
36380 icode = CODE_FOR_avx512f_scatterdiv16sf;
36381 goto scatter_gen;
36382 case IX86_BUILTIN_SCATTERDIV8DF:
36383 icode = CODE_FOR_avx512f_scatterdiv8df;
36384 goto scatter_gen;
36385 case IX86_BUILTIN_SCATTERSIV16SI:
36386 icode = CODE_FOR_avx512f_scattersiv16si;
36387 goto scatter_gen;
36388 case IX86_BUILTIN_SCATTERSIV8DI:
36389 icode = CODE_FOR_avx512f_scattersiv8di;
36390 goto scatter_gen;
36391 case IX86_BUILTIN_SCATTERDIV16SI:
36392 icode = CODE_FOR_avx512f_scatterdiv16si;
36393 goto scatter_gen;
36394 case IX86_BUILTIN_SCATTERDIV8DI:
36395 icode = CODE_FOR_avx512f_scatterdiv8di;
36396 goto scatter_gen;
36398 case IX86_BUILTIN_GATHERPFDPD:
36399 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
36400 goto vec_prefetch_gen;
36401 case IX86_BUILTIN_GATHERPFDPS:
36402 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
36403 goto vec_prefetch_gen;
36404 case IX86_BUILTIN_GATHERPFQPD:
36405 icode = CODE_FOR_avx512pf_gatherpfv8didf;
36406 goto vec_prefetch_gen;
36407 case IX86_BUILTIN_GATHERPFQPS:
36408 icode = CODE_FOR_avx512pf_gatherpfv8disf;
36409 goto vec_prefetch_gen;
36410 case IX86_BUILTIN_SCATTERPFDPD:
36411 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
36412 goto vec_prefetch_gen;
36413 case IX86_BUILTIN_SCATTERPFDPS:
36414 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
36415 goto vec_prefetch_gen;
36416 case IX86_BUILTIN_SCATTERPFQPD:
36417 icode = CODE_FOR_avx512pf_scatterpfv8didf;
36418 goto vec_prefetch_gen;
36419 case IX86_BUILTIN_SCATTERPFQPS:
36420 icode = CODE_FOR_avx512pf_scatterpfv8disf;
36421 goto vec_prefetch_gen;
36423 gather_gen:
36424 rtx half;
36425 rtx (*gen) (rtx, rtx);
36427 arg0 = CALL_EXPR_ARG (exp, 0);
36428 arg1 = CALL_EXPR_ARG (exp, 1);
36429 arg2 = CALL_EXPR_ARG (exp, 2);
36430 arg3 = CALL_EXPR_ARG (exp, 3);
36431 arg4 = CALL_EXPR_ARG (exp, 4);
36432 op0 = expand_normal (arg0);
36433 op1 = expand_normal (arg1);
36434 op2 = expand_normal (arg2);
36435 op3 = expand_normal (arg3);
36436 op4 = expand_normal (arg4);
36437 /* Note the arg order is different from the operand order. */
36438 mode0 = insn_data[icode].operand[1].mode;
36439 mode2 = insn_data[icode].operand[3].mode;
36440 mode3 = insn_data[icode].operand[4].mode;
36441 mode4 = insn_data[icode].operand[5].mode;
36443 if (target == NULL_RTX
36444 || GET_MODE (target) != insn_data[icode].operand[0].mode
36445 || !insn_data[icode].operand[0].predicate (target,
36446 GET_MODE (target)))
36447 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
36448 else
36449 subtarget = target;
36451 switch (fcode)
36453 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36454 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36455 half = gen_reg_rtx (V8SImode);
36456 if (!nonimmediate_operand (op2, V16SImode))
36457 op2 = copy_to_mode_reg (V16SImode, op2);
36458 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36459 op2 = half;
36460 break;
36461 case IX86_BUILTIN_GATHERALTSIV4DF:
36462 case IX86_BUILTIN_GATHERALTSIV4DI:
36463 half = gen_reg_rtx (V4SImode);
36464 if (!nonimmediate_operand (op2, V8SImode))
36465 op2 = copy_to_mode_reg (V8SImode, op2);
36466 emit_insn (gen_vec_extract_lo_v8si (half, op2));
36467 op2 = half;
36468 break;
36469 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36470 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36471 half = gen_reg_rtx (mode0);
36472 if (mode0 == V8SFmode)
36473 gen = gen_vec_extract_lo_v16sf;
36474 else
36475 gen = gen_vec_extract_lo_v16si;
36476 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36477 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36478 emit_insn (gen (half, op0));
36479 op0 = half;
36480 if (GET_MODE (op3) != VOIDmode)
36482 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36483 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36484 emit_insn (gen (half, op3));
36485 op3 = half;
36487 break;
36488 case IX86_BUILTIN_GATHERALTDIV8SF:
36489 case IX86_BUILTIN_GATHERALTDIV8SI:
36490 half = gen_reg_rtx (mode0);
36491 if (mode0 == V4SFmode)
36492 gen = gen_vec_extract_lo_v8sf;
36493 else
36494 gen = gen_vec_extract_lo_v8si;
36495 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36496 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36497 emit_insn (gen (half, op0));
36498 op0 = half;
36499 if (GET_MODE (op3) != VOIDmode)
36501 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36502 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36503 emit_insn (gen (half, op3));
36504 op3 = half;
36506 break;
36507 default:
36508 break;
36511 /* Force memory operand only with base register here. But we
36512 don't want to do it on memory operand for other builtin
36513 functions. */
36514 op1 = ix86_zero_extend_to_Pmode (op1);
36516 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36517 op0 = copy_to_mode_reg (mode0, op0);
36518 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
36519 op1 = copy_to_mode_reg (Pmode, op1);
36520 if (!insn_data[icode].operand[3].predicate (op2, mode2))
36521 op2 = copy_to_mode_reg (mode2, op2);
36522 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
36524 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36525 op3 = copy_to_mode_reg (mode3, op3);
36527 else
36529 op3 = copy_to_reg (op3);
36530 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
36532 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36534 error ("the last argument must be scale 1, 2, 4, 8");
36535 return const0_rtx;
36538 /* Optimize. If mask is known to have all high bits set,
36539 replace op0 with pc_rtx to signal that the instruction
36540 overwrites the whole destination and doesn't use its
36541 previous contents. */
36542 if (optimize)
36544 if (TREE_CODE (arg3) == INTEGER_CST)
36546 if (integer_all_onesp (arg3))
36547 op0 = pc_rtx;
36549 else if (TREE_CODE (arg3) == VECTOR_CST)
36551 unsigned int negative = 0;
36552 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36554 tree cst = VECTOR_CST_ELT (arg3, i);
36555 if (TREE_CODE (cst) == INTEGER_CST
36556 && tree_int_cst_sign_bit (cst))
36557 negative++;
36558 else if (TREE_CODE (cst) == REAL_CST
36559 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36560 negative++;
36562 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36563 op0 = pc_rtx;
36565 else if (TREE_CODE (arg3) == SSA_NAME
36566 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36568 /* Recognize also when mask is like:
36569 __v2df src = _mm_setzero_pd ();
36570 __v2df mask = _mm_cmpeq_pd (src, src);
36572 __v8sf src = _mm256_setzero_ps ();
36573 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36574 as that is a cheaper way to load all ones into
36575 a register than having to load a constant from
36576 memory. */
36577 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
36578 if (is_gimple_call (def_stmt))
36580 tree fndecl = gimple_call_fndecl (def_stmt);
36581 if (fndecl
36582 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36583 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36585 case IX86_BUILTIN_CMPPD:
36586 case IX86_BUILTIN_CMPPS:
36587 case IX86_BUILTIN_CMPPD256:
36588 case IX86_BUILTIN_CMPPS256:
36589 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36590 break;
36591 /* FALLTHRU */
36592 case IX86_BUILTIN_CMPEQPD:
36593 case IX86_BUILTIN_CMPEQPS:
36594 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36595 && initializer_zerop (gimple_call_arg (def_stmt,
36596 1)))
36597 op0 = pc_rtx;
36598 break;
36599 default:
36600 break;
36606 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36607 if (! pat)
36608 return const0_rtx;
36609 emit_insn (pat);
36611 switch (fcode)
36613 case IX86_BUILTIN_GATHER3DIV16SF:
36614 if (target == NULL_RTX)
36615 target = gen_reg_rtx (V8SFmode);
36616 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36617 break;
36618 case IX86_BUILTIN_GATHER3DIV16SI:
36619 if (target == NULL_RTX)
36620 target = gen_reg_rtx (V8SImode);
36621 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36622 break;
36623 case IX86_BUILTIN_GATHERDIV8SF:
36624 if (target == NULL_RTX)
36625 target = gen_reg_rtx (V4SFmode);
36626 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36627 break;
36628 case IX86_BUILTIN_GATHERDIV8SI:
36629 if (target == NULL_RTX)
36630 target = gen_reg_rtx (V4SImode);
36631 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36632 break;
36633 default:
36634 target = subtarget;
36635 break;
36637 return target;
36639 scatter_gen:
36640 arg0 = CALL_EXPR_ARG (exp, 0);
36641 arg1 = CALL_EXPR_ARG (exp, 1);
36642 arg2 = CALL_EXPR_ARG (exp, 2);
36643 arg3 = CALL_EXPR_ARG (exp, 3);
36644 arg4 = CALL_EXPR_ARG (exp, 4);
36645 op0 = expand_normal (arg0);
36646 op1 = expand_normal (arg1);
36647 op2 = expand_normal (arg2);
36648 op3 = expand_normal (arg3);
36649 op4 = expand_normal (arg4);
36650 mode1 = insn_data[icode].operand[1].mode;
36651 mode2 = insn_data[icode].operand[2].mode;
36652 mode3 = insn_data[icode].operand[3].mode;
36653 mode4 = insn_data[icode].operand[4].mode;
36655 /* Force memory operand only with base register here. But we
36656 don't want to do it on memory operand for other builtin
36657 functions. */
36658 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36660 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36661 op0 = copy_to_mode_reg (Pmode, op0);
36663 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36665 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36666 op1 = copy_to_mode_reg (mode1, op1);
36668 else
36670 op1 = copy_to_reg (op1);
36671 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36674 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36675 op2 = copy_to_mode_reg (mode2, op2);
36677 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36678 op3 = copy_to_mode_reg (mode3, op3);
36680 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36682 error ("the last argument must be scale 1, 2, 4, 8");
36683 return const0_rtx;
36686 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36687 if (! pat)
36688 return const0_rtx;
36690 emit_insn (pat);
36691 return 0;
36693 vec_prefetch_gen:
36694 arg0 = CALL_EXPR_ARG (exp, 0);
36695 arg1 = CALL_EXPR_ARG (exp, 1);
36696 arg2 = CALL_EXPR_ARG (exp, 2);
36697 arg3 = CALL_EXPR_ARG (exp, 3);
36698 arg4 = CALL_EXPR_ARG (exp, 4);
36699 op0 = expand_normal (arg0);
36700 op1 = expand_normal (arg1);
36701 op2 = expand_normal (arg2);
36702 op3 = expand_normal (arg3);
36703 op4 = expand_normal (arg4);
36704 mode0 = insn_data[icode].operand[0].mode;
36705 mode1 = insn_data[icode].operand[1].mode;
36706 mode3 = insn_data[icode].operand[3].mode;
36707 mode4 = insn_data[icode].operand[4].mode;
36709 if (GET_MODE (op0) == mode0
36710 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36712 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36713 op0 = copy_to_mode_reg (mode0, op0);
36715 else if (op0 != constm1_rtx)
36717 op0 = copy_to_reg (op0);
36718 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36721 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36722 op1 = copy_to_mode_reg (mode1, op1);
36724 /* Force memory operand only with base register here. But we
36725 don't want to do it on memory operand for other builtin
36726 functions. */
36727 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36729 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36730 op2 = copy_to_mode_reg (Pmode, op2);
36732 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36734 error ("the forth argument must be scale 1, 2, 4, 8");
36735 return const0_rtx;
36738 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36740 error ("incorrect hint operand");
36741 return const0_rtx;
36744 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36745 if (! pat)
36746 return const0_rtx;
36748 emit_insn (pat);
36750 return 0;
36752 case IX86_BUILTIN_XABORT:
36753 icode = CODE_FOR_xabort;
36754 arg0 = CALL_EXPR_ARG (exp, 0);
36755 op0 = expand_normal (arg0);
36756 mode0 = insn_data[icode].operand[0].mode;
36757 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36759 error ("the xabort's argument must be an 8-bit immediate");
36760 return const0_rtx;
36762 emit_insn (gen_xabort (op0));
36763 return 0;
36765 default:
36766 break;
36769 for (i = 0, d = bdesc_special_args;
36770 i < ARRAY_SIZE (bdesc_special_args);
36771 i++, d++)
36772 if (d->code == fcode)
36773 return ix86_expand_special_args_builtin (d, exp, target);
36775 for (i = 0, d = bdesc_args;
36776 i < ARRAY_SIZE (bdesc_args);
36777 i++, d++)
36778 if (d->code == fcode)
36779 switch (fcode)
36781 case IX86_BUILTIN_FABSQ:
36782 case IX86_BUILTIN_COPYSIGNQ:
36783 if (!TARGET_SSE)
36784 /* Emit a normal call if SSE isn't available. */
36785 return expand_call (exp, target, ignore);
36786 default:
36787 return ix86_expand_args_builtin (d, exp, target);
36790 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36791 if (d->code == fcode)
36792 return ix86_expand_sse_comi (d, exp, target);
36794 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36795 if (d->code == fcode)
36796 return ix86_expand_round_builtin (d, exp, target);
36798 for (i = 0, d = bdesc_pcmpestr;
36799 i < ARRAY_SIZE (bdesc_pcmpestr);
36800 i++, d++)
36801 if (d->code == fcode)
36802 return ix86_expand_sse_pcmpestr (d, exp, target);
36804 for (i = 0, d = bdesc_pcmpistr;
36805 i < ARRAY_SIZE (bdesc_pcmpistr);
36806 i++, d++)
36807 if (d->code == fcode)
36808 return ix86_expand_sse_pcmpistr (d, exp, target);
36810 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36811 if (d->code == fcode)
36812 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36813 (enum ix86_builtin_func_type)
36814 d->flag, d->comparison);
36816 gcc_unreachable ();
36819 /* This returns the target-specific builtin with code CODE if
36820 current_function_decl has visibility on this builtin, which is checked
36821 using isa flags. Returns NULL_TREE otherwise. */
36823 static tree ix86_get_builtin (enum ix86_builtins code)
36825 struct cl_target_option *opts;
36826 tree target_tree = NULL_TREE;
36828 /* Determine the isa flags of current_function_decl. */
36830 if (current_function_decl)
36831 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36833 if (target_tree == NULL)
36834 target_tree = target_option_default_node;
36836 opts = TREE_TARGET_OPTION (target_tree);
36838 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36839 return ix86_builtin_decl (code, true);
36840 else
36841 return NULL_TREE;
36844 /* Return function decl for target specific builtin
36845 for given MPX builtin passed i FCODE. */
36846 static tree
36847 ix86_builtin_mpx_function (unsigned fcode)
36849 switch (fcode)
36851 case BUILT_IN_CHKP_BNDMK:
36852 return ix86_builtins[IX86_BUILTIN_BNDMK];
36854 case BUILT_IN_CHKP_BNDSTX:
36855 return ix86_builtins[IX86_BUILTIN_BNDSTX];
36857 case BUILT_IN_CHKP_BNDLDX:
36858 return ix86_builtins[IX86_BUILTIN_BNDLDX];
36860 case BUILT_IN_CHKP_BNDCL:
36861 return ix86_builtins[IX86_BUILTIN_BNDCL];
36863 case BUILT_IN_CHKP_BNDCU:
36864 return ix86_builtins[IX86_BUILTIN_BNDCU];
36866 case BUILT_IN_CHKP_BNDRET:
36867 return ix86_builtins[IX86_BUILTIN_BNDRET];
36869 case BUILT_IN_CHKP_INTERSECT:
36870 return ix86_builtins[IX86_BUILTIN_BNDINT];
36872 case BUILT_IN_CHKP_NARROW:
36873 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
36875 case BUILT_IN_CHKP_SIZEOF:
36876 return ix86_builtins[IX86_BUILTIN_SIZEOF];
36878 case BUILT_IN_CHKP_EXTRACT_LOWER:
36879 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
36881 case BUILT_IN_CHKP_EXTRACT_UPPER:
36882 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
36884 default:
36885 return NULL_TREE;
36888 gcc_unreachable ();
36891 /* Load bounds PTR pointer value loaded from SLOT.
36892 if SLOT is a register then load bounds associated
36893 with special address identified by BND.
36895 Return loaded bounds. */
36896 static rtx
36897 ix86_load_bounds (rtx slot, rtx ptr, rtx bnd)
36899 rtx addr = NULL;
36900 rtx reg;
36902 if (!ptr)
36904 gcc_assert (MEM_P (slot));
36905 ptr = copy_to_mode_reg (Pmode, slot);
36908 if (!slot || REG_P (slot))
36910 if (slot)
36911 ptr = slot;
36913 gcc_assert (CONST_INT_P (bnd));
36915 /* Here we have the case when more than four pointers are
36916 passed in registers. In this case we are out of bound
36917 registers and have to use bndldx to load bound. RA,
36918 RA - 8, etc. are used for address translation in bndldx. */
36919 addr = plus_constant (Pmode, arg_pointer_rtx, -(INTVAL (bnd) + 1) * 8);
36921 else if (MEM_P (slot))
36923 addr = XEXP (slot, 0);
36924 addr = force_reg (Pmode, addr);
36926 else
36927 gcc_unreachable ();
36929 ptr = force_reg (Pmode, ptr);
36930 /* If ptr was a register originally then it may have
36931 mode other than Pmode. We need to extend in such
36932 case because bndldx may work only with Pmode regs. */
36933 if (GET_MODE (ptr) != Pmode)
36935 rtx ext = gen_rtx_ZERO_EXTEND (Pmode, ptr);
36936 ptr = gen_reg_rtx (Pmode);
36937 emit_move_insn (ptr, ext);
36940 reg = gen_reg_rtx (BNDmode);
36941 emit_insn (TARGET_64BIT
36942 ? gen_bnd64_ldx (reg, addr, ptr)
36943 : gen_bnd32_ldx (reg, addr, ptr));
36945 return reg;
36948 /* Store bounds BOUNDS for PTR pointer value stored in
36949 specified ADDR. If ADDR is a register then TO identifies
36950 which special address to use for bounds store. */
36951 static void
36952 ix86_store_bounds (rtx ptr, rtx addr, rtx bounds, rtx to)
36954 if (!ptr)
36956 gcc_assert (MEM_P (addr));
36957 ptr = copy_to_mode_reg (Pmode, addr);
36960 if (!addr || REG_P (addr))
36962 gcc_assert (CONST_INT_P (to));
36963 addr = plus_constant (Pmode, stack_pointer_rtx, -(INTVAL (to) + 1) * 8);
36965 else if (MEM_P (addr))
36966 addr = XEXP (addr, 0);
36967 else
36968 gcc_unreachable ();
36970 /* Should we also ignore integer modes of incorrect size?. */
36971 ptr = force_reg (Pmode, ptr);
36972 addr = force_reg (Pmode, addr);
36974 /* Avoid registers which connot be used as index. */
36975 if (!index_register_operand (ptr, Pmode))
36977 rtx temp = gen_reg_rtx (Pmode);
36978 emit_move_insn (temp, ptr);
36979 ptr = temp;
36982 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
36983 bounds = force_reg (GET_MODE (bounds), bounds);
36985 emit_insn (TARGET_64BIT
36986 ? gen_bnd64_stx (addr, ptr, bounds)
36987 : gen_bnd32_stx (addr, ptr, bounds));
36990 /* Load and return bounds returned by function in SLOT. */
36991 static rtx
36992 ix86_load_returned_bounds (rtx slot)
36994 rtx res;
36996 gcc_assert (REG_P (slot));
36997 res = gen_reg_rtx (BNDmode);
36998 emit_move_insn (res, slot);
37000 return res;
37003 /* Store BOUNDS returned by function into SLOT. */
37004 static void
37005 ix86_store_returned_bounds (rtx slot, rtx bounds)
37007 gcc_assert (REG_P (slot));
37008 emit_move_insn (slot, bounds);
37011 /* Returns a function decl for a vectorized version of the builtin function
37012 with builtin function code FN and the result vector type TYPE, or NULL_TREE
37013 if it is not available. */
37015 static tree
37016 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
37017 tree type_in)
37019 enum machine_mode in_mode, out_mode;
37020 int in_n, out_n;
37021 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
37023 if (TREE_CODE (type_out) != VECTOR_TYPE
37024 || TREE_CODE (type_in) != VECTOR_TYPE
37025 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
37026 return NULL_TREE;
37028 out_mode = TYPE_MODE (TREE_TYPE (type_out));
37029 out_n = TYPE_VECTOR_SUBPARTS (type_out);
37030 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37031 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37033 switch (fn)
37035 case BUILT_IN_SQRT:
37036 if (out_mode == DFmode && in_mode == DFmode)
37038 if (out_n == 2 && in_n == 2)
37039 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
37040 else if (out_n == 4 && in_n == 4)
37041 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
37042 else if (out_n == 8 && in_n == 8)
37043 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
37045 break;
37047 case BUILT_IN_EXP2F:
37048 if (out_mode == SFmode && in_mode == SFmode)
37050 if (out_n == 16 && in_n == 16)
37051 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
37053 break;
37055 case BUILT_IN_SQRTF:
37056 if (out_mode == SFmode && in_mode == SFmode)
37058 if (out_n == 4 && in_n == 4)
37059 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
37060 else if (out_n == 8 && in_n == 8)
37061 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
37062 else if (out_n == 16 && in_n == 16)
37063 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
37065 break;
37067 case BUILT_IN_IFLOOR:
37068 case BUILT_IN_LFLOOR:
37069 case BUILT_IN_LLFLOOR:
37070 /* The round insn does not trap on denormals. */
37071 if (flag_trapping_math || !TARGET_ROUND)
37072 break;
37074 if (out_mode == SImode && in_mode == DFmode)
37076 if (out_n == 4 && in_n == 2)
37077 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
37078 else if (out_n == 8 && in_n == 4)
37079 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
37080 else if (out_n == 16 && in_n == 8)
37081 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
37083 break;
37085 case BUILT_IN_IFLOORF:
37086 case BUILT_IN_LFLOORF:
37087 case BUILT_IN_LLFLOORF:
37088 /* The round insn does not trap on denormals. */
37089 if (flag_trapping_math || !TARGET_ROUND)
37090 break;
37092 if (out_mode == SImode && in_mode == SFmode)
37094 if (out_n == 4 && in_n == 4)
37095 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
37096 else if (out_n == 8 && in_n == 8)
37097 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
37099 break;
37101 case BUILT_IN_ICEIL:
37102 case BUILT_IN_LCEIL:
37103 case BUILT_IN_LLCEIL:
37104 /* The round insn does not trap on denormals. */
37105 if (flag_trapping_math || !TARGET_ROUND)
37106 break;
37108 if (out_mode == SImode && in_mode == DFmode)
37110 if (out_n == 4 && in_n == 2)
37111 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
37112 else if (out_n == 8 && in_n == 4)
37113 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
37114 else if (out_n == 16 && in_n == 8)
37115 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
37117 break;
37119 case BUILT_IN_ICEILF:
37120 case BUILT_IN_LCEILF:
37121 case BUILT_IN_LLCEILF:
37122 /* The round insn does not trap on denormals. */
37123 if (flag_trapping_math || !TARGET_ROUND)
37124 break;
37126 if (out_mode == SImode && in_mode == SFmode)
37128 if (out_n == 4 && in_n == 4)
37129 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
37130 else if (out_n == 8 && in_n == 8)
37131 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
37133 break;
37135 case BUILT_IN_IRINT:
37136 case BUILT_IN_LRINT:
37137 case BUILT_IN_LLRINT:
37138 if (out_mode == SImode && in_mode == DFmode)
37140 if (out_n == 4 && in_n == 2)
37141 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
37142 else if (out_n == 8 && in_n == 4)
37143 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
37145 break;
37147 case BUILT_IN_IRINTF:
37148 case BUILT_IN_LRINTF:
37149 case BUILT_IN_LLRINTF:
37150 if (out_mode == SImode && in_mode == SFmode)
37152 if (out_n == 4 && in_n == 4)
37153 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
37154 else if (out_n == 8 && in_n == 8)
37155 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
37157 break;
37159 case BUILT_IN_IROUND:
37160 case BUILT_IN_LROUND:
37161 case BUILT_IN_LLROUND:
37162 /* The round insn does not trap on denormals. */
37163 if (flag_trapping_math || !TARGET_ROUND)
37164 break;
37166 if (out_mode == SImode && in_mode == DFmode)
37168 if (out_n == 4 && in_n == 2)
37169 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
37170 else if (out_n == 8 && in_n == 4)
37171 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
37172 else if (out_n == 16 && in_n == 8)
37173 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
37175 break;
37177 case BUILT_IN_IROUNDF:
37178 case BUILT_IN_LROUNDF:
37179 case BUILT_IN_LLROUNDF:
37180 /* The round insn does not trap on denormals. */
37181 if (flag_trapping_math || !TARGET_ROUND)
37182 break;
37184 if (out_mode == SImode && in_mode == SFmode)
37186 if (out_n == 4 && in_n == 4)
37187 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
37188 else if (out_n == 8 && in_n == 8)
37189 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
37191 break;
37193 case BUILT_IN_COPYSIGN:
37194 if (out_mode == DFmode && in_mode == DFmode)
37196 if (out_n == 2 && in_n == 2)
37197 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
37198 else if (out_n == 4 && in_n == 4)
37199 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
37200 else if (out_n == 8 && in_n == 8)
37201 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
37203 break;
37205 case BUILT_IN_COPYSIGNF:
37206 if (out_mode == SFmode && in_mode == SFmode)
37208 if (out_n == 4 && in_n == 4)
37209 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
37210 else if (out_n == 8 && in_n == 8)
37211 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
37212 else if (out_n == 16 && in_n == 16)
37213 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
37215 break;
37217 case BUILT_IN_FLOOR:
37218 /* The round insn does not trap on denormals. */
37219 if (flag_trapping_math || !TARGET_ROUND)
37220 break;
37222 if (out_mode == DFmode && in_mode == DFmode)
37224 if (out_n == 2 && in_n == 2)
37225 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
37226 else if (out_n == 4 && in_n == 4)
37227 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
37229 break;
37231 case BUILT_IN_FLOORF:
37232 /* The round insn does not trap on denormals. */
37233 if (flag_trapping_math || !TARGET_ROUND)
37234 break;
37236 if (out_mode == SFmode && in_mode == SFmode)
37238 if (out_n == 4 && in_n == 4)
37239 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
37240 else if (out_n == 8 && in_n == 8)
37241 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
37243 break;
37245 case BUILT_IN_CEIL:
37246 /* The round insn does not trap on denormals. */
37247 if (flag_trapping_math || !TARGET_ROUND)
37248 break;
37250 if (out_mode == DFmode && in_mode == DFmode)
37252 if (out_n == 2 && in_n == 2)
37253 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
37254 else if (out_n == 4 && in_n == 4)
37255 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
37257 break;
37259 case BUILT_IN_CEILF:
37260 /* The round insn does not trap on denormals. */
37261 if (flag_trapping_math || !TARGET_ROUND)
37262 break;
37264 if (out_mode == SFmode && in_mode == SFmode)
37266 if (out_n == 4 && in_n == 4)
37267 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
37268 else if (out_n == 8 && in_n == 8)
37269 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
37271 break;
37273 case BUILT_IN_TRUNC:
37274 /* The round insn does not trap on denormals. */
37275 if (flag_trapping_math || !TARGET_ROUND)
37276 break;
37278 if (out_mode == DFmode && in_mode == DFmode)
37280 if (out_n == 2 && in_n == 2)
37281 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
37282 else if (out_n == 4 && in_n == 4)
37283 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
37285 break;
37287 case BUILT_IN_TRUNCF:
37288 /* The round insn does not trap on denormals. */
37289 if (flag_trapping_math || !TARGET_ROUND)
37290 break;
37292 if (out_mode == SFmode && in_mode == SFmode)
37294 if (out_n == 4 && in_n == 4)
37295 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
37296 else if (out_n == 8 && in_n == 8)
37297 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
37299 break;
37301 case BUILT_IN_RINT:
37302 /* The round insn does not trap on denormals. */
37303 if (flag_trapping_math || !TARGET_ROUND)
37304 break;
37306 if (out_mode == DFmode && in_mode == DFmode)
37308 if (out_n == 2 && in_n == 2)
37309 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
37310 else if (out_n == 4 && in_n == 4)
37311 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
37313 break;
37315 case BUILT_IN_RINTF:
37316 /* The round insn does not trap on denormals. */
37317 if (flag_trapping_math || !TARGET_ROUND)
37318 break;
37320 if (out_mode == SFmode && in_mode == SFmode)
37322 if (out_n == 4 && in_n == 4)
37323 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
37324 else if (out_n == 8 && in_n == 8)
37325 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
37327 break;
37329 case BUILT_IN_ROUND:
37330 /* The round insn does not trap on denormals. */
37331 if (flag_trapping_math || !TARGET_ROUND)
37332 break;
37334 if (out_mode == DFmode && in_mode == DFmode)
37336 if (out_n == 2 && in_n == 2)
37337 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
37338 else if (out_n == 4 && in_n == 4)
37339 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
37341 break;
37343 case BUILT_IN_ROUNDF:
37344 /* The round insn does not trap on denormals. */
37345 if (flag_trapping_math || !TARGET_ROUND)
37346 break;
37348 if (out_mode == SFmode && in_mode == SFmode)
37350 if (out_n == 4 && in_n == 4)
37351 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
37352 else if (out_n == 8 && in_n == 8)
37353 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
37355 break;
37357 case BUILT_IN_FMA:
37358 if (out_mode == DFmode && in_mode == DFmode)
37360 if (out_n == 2 && in_n == 2)
37361 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
37362 if (out_n == 4 && in_n == 4)
37363 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
37365 break;
37367 case BUILT_IN_FMAF:
37368 if (out_mode == SFmode && in_mode == SFmode)
37370 if (out_n == 4 && in_n == 4)
37371 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
37372 if (out_n == 8 && in_n == 8)
37373 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
37375 break;
37377 default:
37378 break;
37381 /* Dispatch to a handler for a vectorization library. */
37382 if (ix86_veclib_handler)
37383 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
37384 type_in);
37386 return NULL_TREE;
37389 /* Handler for an SVML-style interface to
37390 a library with vectorized intrinsics. */
37392 static tree
37393 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
37395 char name[20];
37396 tree fntype, new_fndecl, args;
37397 unsigned arity;
37398 const char *bname;
37399 enum machine_mode el_mode, in_mode;
37400 int n, in_n;
37402 /* The SVML is suitable for unsafe math only. */
37403 if (!flag_unsafe_math_optimizations)
37404 return NULL_TREE;
37406 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37407 n = TYPE_VECTOR_SUBPARTS (type_out);
37408 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37409 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37410 if (el_mode != in_mode
37411 || n != in_n)
37412 return NULL_TREE;
37414 switch (fn)
37416 case BUILT_IN_EXP:
37417 case BUILT_IN_LOG:
37418 case BUILT_IN_LOG10:
37419 case BUILT_IN_POW:
37420 case BUILT_IN_TANH:
37421 case BUILT_IN_TAN:
37422 case BUILT_IN_ATAN:
37423 case BUILT_IN_ATAN2:
37424 case BUILT_IN_ATANH:
37425 case BUILT_IN_CBRT:
37426 case BUILT_IN_SINH:
37427 case BUILT_IN_SIN:
37428 case BUILT_IN_ASINH:
37429 case BUILT_IN_ASIN:
37430 case BUILT_IN_COSH:
37431 case BUILT_IN_COS:
37432 case BUILT_IN_ACOSH:
37433 case BUILT_IN_ACOS:
37434 if (el_mode != DFmode || n != 2)
37435 return NULL_TREE;
37436 break;
37438 case BUILT_IN_EXPF:
37439 case BUILT_IN_LOGF:
37440 case BUILT_IN_LOG10F:
37441 case BUILT_IN_POWF:
37442 case BUILT_IN_TANHF:
37443 case BUILT_IN_TANF:
37444 case BUILT_IN_ATANF:
37445 case BUILT_IN_ATAN2F:
37446 case BUILT_IN_ATANHF:
37447 case BUILT_IN_CBRTF:
37448 case BUILT_IN_SINHF:
37449 case BUILT_IN_SINF:
37450 case BUILT_IN_ASINHF:
37451 case BUILT_IN_ASINF:
37452 case BUILT_IN_COSHF:
37453 case BUILT_IN_COSF:
37454 case BUILT_IN_ACOSHF:
37455 case BUILT_IN_ACOSF:
37456 if (el_mode != SFmode || n != 4)
37457 return NULL_TREE;
37458 break;
37460 default:
37461 return NULL_TREE;
37464 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
37466 if (fn == BUILT_IN_LOGF)
37467 strcpy (name, "vmlsLn4");
37468 else if (fn == BUILT_IN_LOG)
37469 strcpy (name, "vmldLn2");
37470 else if (n == 4)
37472 sprintf (name, "vmls%s", bname+10);
37473 name[strlen (name)-1] = '4';
37475 else
37476 sprintf (name, "vmld%s2", bname+10);
37478 /* Convert to uppercase. */
37479 name[4] &= ~0x20;
37481 arity = 0;
37482 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
37483 args;
37484 args = TREE_CHAIN (args))
37485 arity++;
37487 if (arity == 1)
37488 fntype = build_function_type_list (type_out, type_in, NULL);
37489 else
37490 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37492 /* Build a function declaration for the vectorized function. */
37493 new_fndecl = build_decl (BUILTINS_LOCATION,
37494 FUNCTION_DECL, get_identifier (name), fntype);
37495 TREE_PUBLIC (new_fndecl) = 1;
37496 DECL_EXTERNAL (new_fndecl) = 1;
37497 DECL_IS_NOVOPS (new_fndecl) = 1;
37498 TREE_READONLY (new_fndecl) = 1;
37500 return new_fndecl;
37503 /* Handler for an ACML-style interface to
37504 a library with vectorized intrinsics. */
37506 static tree
37507 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
37509 char name[20] = "__vr.._";
37510 tree fntype, new_fndecl, args;
37511 unsigned arity;
37512 const char *bname;
37513 enum machine_mode el_mode, in_mode;
37514 int n, in_n;
37516 /* The ACML is 64bits only and suitable for unsafe math only as
37517 it does not correctly support parts of IEEE with the required
37518 precision such as denormals. */
37519 if (!TARGET_64BIT
37520 || !flag_unsafe_math_optimizations)
37521 return NULL_TREE;
37523 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37524 n = TYPE_VECTOR_SUBPARTS (type_out);
37525 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37526 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37527 if (el_mode != in_mode
37528 || n != in_n)
37529 return NULL_TREE;
37531 switch (fn)
37533 case BUILT_IN_SIN:
37534 case BUILT_IN_COS:
37535 case BUILT_IN_EXP:
37536 case BUILT_IN_LOG:
37537 case BUILT_IN_LOG2:
37538 case BUILT_IN_LOG10:
37539 name[4] = 'd';
37540 name[5] = '2';
37541 if (el_mode != DFmode
37542 || n != 2)
37543 return NULL_TREE;
37544 break;
37546 case BUILT_IN_SINF:
37547 case BUILT_IN_COSF:
37548 case BUILT_IN_EXPF:
37549 case BUILT_IN_POWF:
37550 case BUILT_IN_LOGF:
37551 case BUILT_IN_LOG2F:
37552 case BUILT_IN_LOG10F:
37553 name[4] = 's';
37554 name[5] = '4';
37555 if (el_mode != SFmode
37556 || n != 4)
37557 return NULL_TREE;
37558 break;
37560 default:
37561 return NULL_TREE;
37564 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
37565 sprintf (name + 7, "%s", bname+10);
37567 arity = 0;
37568 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
37569 args;
37570 args = TREE_CHAIN (args))
37571 arity++;
37573 if (arity == 1)
37574 fntype = build_function_type_list (type_out, type_in, NULL);
37575 else
37576 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37578 /* Build a function declaration for the vectorized function. */
37579 new_fndecl = build_decl (BUILTINS_LOCATION,
37580 FUNCTION_DECL, get_identifier (name), fntype);
37581 TREE_PUBLIC (new_fndecl) = 1;
37582 DECL_EXTERNAL (new_fndecl) = 1;
37583 DECL_IS_NOVOPS (new_fndecl) = 1;
37584 TREE_READONLY (new_fndecl) = 1;
37586 return new_fndecl;
37589 /* Returns a decl of a function that implements gather load with
37590 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
37591 Return NULL_TREE if it is not available. */
37593 static tree
37594 ix86_vectorize_builtin_gather (const_tree mem_vectype,
37595 const_tree index_type, int scale)
37597 bool si;
37598 enum ix86_builtins code;
37600 if (! TARGET_AVX2)
37601 return NULL_TREE;
37603 if ((TREE_CODE (index_type) != INTEGER_TYPE
37604 && !POINTER_TYPE_P (index_type))
37605 || (TYPE_MODE (index_type) != SImode
37606 && TYPE_MODE (index_type) != DImode))
37607 return NULL_TREE;
37609 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
37610 return NULL_TREE;
37612 /* v*gather* insn sign extends index to pointer mode. */
37613 if (TYPE_PRECISION (index_type) < POINTER_SIZE
37614 && TYPE_UNSIGNED (index_type))
37615 return NULL_TREE;
37617 if (scale <= 0
37618 || scale > 8
37619 || (scale & (scale - 1)) != 0)
37620 return NULL_TREE;
37622 si = TYPE_MODE (index_type) == SImode;
37623 switch (TYPE_MODE (mem_vectype))
37625 case V2DFmode:
37626 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
37627 break;
37628 case V4DFmode:
37629 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
37630 break;
37631 case V2DImode:
37632 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
37633 break;
37634 case V4DImode:
37635 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
37636 break;
37637 case V4SFmode:
37638 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
37639 break;
37640 case V8SFmode:
37641 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
37642 break;
37643 case V4SImode:
37644 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
37645 break;
37646 case V8SImode:
37647 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
37648 break;
37649 case V8DFmode:
37650 if (TARGET_AVX512F)
37651 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
37652 else
37653 return NULL_TREE;
37654 break;
37655 case V8DImode:
37656 if (TARGET_AVX512F)
37657 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
37658 else
37659 return NULL_TREE;
37660 break;
37661 case V16SFmode:
37662 if (TARGET_AVX512F)
37663 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
37664 else
37665 return NULL_TREE;
37666 break;
37667 case V16SImode:
37668 if (TARGET_AVX512F)
37669 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
37670 else
37671 return NULL_TREE;
37672 break;
37673 default:
37674 return NULL_TREE;
37677 return ix86_get_builtin (code);
37680 /* Returns a code for a target-specific builtin that implements
37681 reciprocal of the function, or NULL_TREE if not available. */
37683 static tree
37684 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
37685 bool sqrt ATTRIBUTE_UNUSED)
37687 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
37688 && flag_finite_math_only && !flag_trapping_math
37689 && flag_unsafe_math_optimizations))
37690 return NULL_TREE;
37692 if (md_fn)
37693 /* Machine dependent builtins. */
37694 switch (fn)
37696 /* Vectorized version of sqrt to rsqrt conversion. */
37697 case IX86_BUILTIN_SQRTPS_NR:
37698 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
37700 case IX86_BUILTIN_SQRTPS_NR256:
37701 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
37703 default:
37704 return NULL_TREE;
37706 else
37707 /* Normal builtins. */
37708 switch (fn)
37710 /* Sqrt to rsqrt conversion. */
37711 case BUILT_IN_SQRTF:
37712 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
37714 default:
37715 return NULL_TREE;
37719 /* Helper for avx_vpermilps256_operand et al. This is also used by
37720 the expansion functions to turn the parallel back into a mask.
37721 The return value is 0 for no match and the imm8+1 for a match. */
37724 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
37726 unsigned i, nelt = GET_MODE_NUNITS (mode);
37727 unsigned mask = 0;
37728 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37730 if (XVECLEN (par, 0) != (int) nelt)
37731 return 0;
37733 /* Validate that all of the elements are constants, and not totally
37734 out of range. Copy the data into an integral array to make the
37735 subsequent checks easier. */
37736 for (i = 0; i < nelt; ++i)
37738 rtx er = XVECEXP (par, 0, i);
37739 unsigned HOST_WIDE_INT ei;
37741 if (!CONST_INT_P (er))
37742 return 0;
37743 ei = INTVAL (er);
37744 if (ei >= nelt)
37745 return 0;
37746 ipar[i] = ei;
37749 switch (mode)
37751 case V8DFmode:
37752 /* In the 512-bit DFmode case, we can only move elements within
37753 a 128-bit lane. First fill the second part of the mask,
37754 then fallthru. */
37755 for (i = 4; i < 6; ++i)
37757 if (ipar[i] < 4 || ipar[i] >= 6)
37758 return 0;
37759 mask |= (ipar[i] - 4) << i;
37761 for (i = 6; i < 8; ++i)
37763 if (ipar[i] < 6)
37764 return 0;
37765 mask |= (ipar[i] - 6) << i;
37767 /* FALLTHRU */
37769 case V4DFmode:
37770 /* In the 256-bit DFmode case, we can only move elements within
37771 a 128-bit lane. */
37772 for (i = 0; i < 2; ++i)
37774 if (ipar[i] >= 2)
37775 return 0;
37776 mask |= ipar[i] << i;
37778 for (i = 2; i < 4; ++i)
37780 if (ipar[i] < 2)
37781 return 0;
37782 mask |= (ipar[i] - 2) << i;
37784 break;
37786 case V16SFmode:
37787 /* In 512 bit SFmode case, permutation in the upper 256 bits
37788 must mirror the permutation in the lower 256-bits. */
37789 for (i = 0; i < 8; ++i)
37790 if (ipar[i] + 8 != ipar[i + 8])
37791 return 0;
37792 /* FALLTHRU */
37794 case V8SFmode:
37795 /* In 256 bit SFmode case, we have full freedom of
37796 movement within the low 128-bit lane, but the high 128-bit
37797 lane must mirror the exact same pattern. */
37798 for (i = 0; i < 4; ++i)
37799 if (ipar[i] + 4 != ipar[i + 4])
37800 return 0;
37801 nelt = 4;
37802 /* FALLTHRU */
37804 case V2DFmode:
37805 case V4SFmode:
37806 /* In the 128-bit case, we've full freedom in the placement of
37807 the elements from the source operand. */
37808 for (i = 0; i < nelt; ++i)
37809 mask |= ipar[i] << (i * (nelt / 2));
37810 break;
37812 default:
37813 gcc_unreachable ();
37816 /* Make sure success has a non-zero value by adding one. */
37817 return mask + 1;
37820 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37821 the expansion functions to turn the parallel back into a mask.
37822 The return value is 0 for no match and the imm8+1 for a match. */
37825 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37827 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37828 unsigned mask = 0;
37829 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37831 if (XVECLEN (par, 0) != (int) nelt)
37832 return 0;
37834 /* Validate that all of the elements are constants, and not totally
37835 out of range. Copy the data into an integral array to make the
37836 subsequent checks easier. */
37837 for (i = 0; i < nelt; ++i)
37839 rtx er = XVECEXP (par, 0, i);
37840 unsigned HOST_WIDE_INT ei;
37842 if (!CONST_INT_P (er))
37843 return 0;
37844 ei = INTVAL (er);
37845 if (ei >= 2 * nelt)
37846 return 0;
37847 ipar[i] = ei;
37850 /* Validate that the halves of the permute are halves. */
37851 for (i = 0; i < nelt2 - 1; ++i)
37852 if (ipar[i] + 1 != ipar[i + 1])
37853 return 0;
37854 for (i = nelt2; i < nelt - 1; ++i)
37855 if (ipar[i] + 1 != ipar[i + 1])
37856 return 0;
37858 /* Reconstruct the mask. */
37859 for (i = 0; i < 2; ++i)
37861 unsigned e = ipar[i * nelt2];
37862 if (e % nelt2)
37863 return 0;
37864 e /= nelt2;
37865 mask |= e << (i * 4);
37868 /* Make sure success has a non-zero value by adding one. */
37869 return mask + 1;
37872 /* Return a register priority for hard reg REGNO. */
37873 static int
37874 ix86_register_priority (int hard_regno)
37876 /* ebp and r13 as the base always wants a displacement, r12 as the
37877 base always wants an index. So discourage their usage in an
37878 address. */
37879 if (hard_regno == R12_REG || hard_regno == R13_REG)
37880 return 0;
37881 if (hard_regno == BP_REG)
37882 return 1;
37883 /* New x86-64 int registers result in bigger code size. Discourage
37884 them. */
37885 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37886 return 2;
37887 /* New x86-64 SSE registers result in bigger code size. Discourage
37888 them. */
37889 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37890 return 2;
37891 /* Usage of AX register results in smaller code. Prefer it. */
37892 if (hard_regno == 0)
37893 return 4;
37894 return 3;
37897 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37899 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37900 QImode must go into class Q_REGS.
37901 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37902 movdf to do mem-to-mem moves through integer regs. */
37904 static reg_class_t
37905 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37907 enum machine_mode mode = GET_MODE (x);
37909 /* We're only allowed to return a subclass of CLASS. Many of the
37910 following checks fail for NO_REGS, so eliminate that early. */
37911 if (regclass == NO_REGS)
37912 return NO_REGS;
37914 /* All classes can load zeros. */
37915 if (x == CONST0_RTX (mode))
37916 return regclass;
37918 /* Force constants into memory if we are loading a (nonzero) constant into
37919 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37920 instructions to load from a constant. */
37921 if (CONSTANT_P (x)
37922 && (MAYBE_MMX_CLASS_P (regclass)
37923 || MAYBE_SSE_CLASS_P (regclass)
37924 || MAYBE_MASK_CLASS_P (regclass)))
37925 return NO_REGS;
37927 /* Prefer SSE regs only, if we can use them for math. */
37928 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37929 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37931 /* Floating-point constants need more complex checks. */
37932 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37934 /* General regs can load everything. */
37935 if (reg_class_subset_p (regclass, GENERAL_REGS))
37936 return regclass;
37938 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37939 zero above. We only want to wind up preferring 80387 registers if
37940 we plan on doing computation with them. */
37941 if (TARGET_80387
37942 && standard_80387_constant_p (x) > 0)
37944 /* Limit class to non-sse. */
37945 if (regclass == FLOAT_SSE_REGS)
37946 return FLOAT_REGS;
37947 if (regclass == FP_TOP_SSE_REGS)
37948 return FP_TOP_REG;
37949 if (regclass == FP_SECOND_SSE_REGS)
37950 return FP_SECOND_REG;
37951 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37952 return regclass;
37955 return NO_REGS;
37958 /* Generally when we see PLUS here, it's the function invariant
37959 (plus soft-fp const_int). Which can only be computed into general
37960 regs. */
37961 if (GET_CODE (x) == PLUS)
37962 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37964 /* QImode constants are easy to load, but non-constant QImode data
37965 must go into Q_REGS. */
37966 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37968 if (reg_class_subset_p (regclass, Q_REGS))
37969 return regclass;
37970 if (reg_class_subset_p (Q_REGS, regclass))
37971 return Q_REGS;
37972 return NO_REGS;
37975 return regclass;
37978 /* Discourage putting floating-point values in SSE registers unless
37979 SSE math is being used, and likewise for the 387 registers. */
37980 static reg_class_t
37981 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37983 enum machine_mode mode = GET_MODE (x);
37985 /* Restrict the output reload class to the register bank that we are doing
37986 math on. If we would like not to return a subset of CLASS, reject this
37987 alternative: if reload cannot do this, it will still use its choice. */
37988 mode = GET_MODE (x);
37989 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37990 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37992 if (X87_FLOAT_MODE_P (mode))
37994 if (regclass == FP_TOP_SSE_REGS)
37995 return FP_TOP_REG;
37996 else if (regclass == FP_SECOND_SSE_REGS)
37997 return FP_SECOND_REG;
37998 else
37999 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
38002 return regclass;
38005 static reg_class_t
38006 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
38007 enum machine_mode mode, secondary_reload_info *sri)
38009 /* Double-word spills from general registers to non-offsettable memory
38010 references (zero-extended addresses) require special handling. */
38011 if (TARGET_64BIT
38012 && MEM_P (x)
38013 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
38014 && INTEGER_CLASS_P (rclass)
38015 && !offsettable_memref_p (x))
38017 sri->icode = (in_p
38018 ? CODE_FOR_reload_noff_load
38019 : CODE_FOR_reload_noff_store);
38020 /* Add the cost of moving address to a temporary. */
38021 sri->extra_cost = 1;
38023 return NO_REGS;
38026 /* QImode spills from non-QI registers require
38027 intermediate register on 32bit targets. */
38028 if (mode == QImode
38029 && (MAYBE_MASK_CLASS_P (rclass)
38030 || (!TARGET_64BIT && !in_p
38031 && INTEGER_CLASS_P (rclass)
38032 && MAYBE_NON_Q_CLASS_P (rclass))))
38034 int regno;
38036 if (REG_P (x))
38037 regno = REGNO (x);
38038 else
38039 regno = -1;
38041 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
38042 regno = true_regnum (x);
38044 /* Return Q_REGS if the operand is in memory. */
38045 if (regno == -1)
38046 return Q_REGS;
38049 /* This condition handles corner case where an expression involving
38050 pointers gets vectorized. We're trying to use the address of a
38051 stack slot as a vector initializer.
38053 (set (reg:V2DI 74 [ vect_cst_.2 ])
38054 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
38056 Eventually frame gets turned into sp+offset like this:
38058 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38059 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38060 (const_int 392 [0x188]))))
38062 That later gets turned into:
38064 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38065 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38066 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
38068 We'll have the following reload recorded:
38070 Reload 0: reload_in (DI) =
38071 (plus:DI (reg/f:DI 7 sp)
38072 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
38073 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38074 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
38075 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
38076 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38077 reload_reg_rtx: (reg:V2DI 22 xmm1)
38079 Which isn't going to work since SSE instructions can't handle scalar
38080 additions. Returning GENERAL_REGS forces the addition into integer
38081 register and reload can handle subsequent reloads without problems. */
38083 if (in_p && GET_CODE (x) == PLUS
38084 && SSE_CLASS_P (rclass)
38085 && SCALAR_INT_MODE_P (mode))
38086 return GENERAL_REGS;
38088 return NO_REGS;
38091 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
38093 static bool
38094 ix86_class_likely_spilled_p (reg_class_t rclass)
38096 switch (rclass)
38098 case AREG:
38099 case DREG:
38100 case CREG:
38101 case BREG:
38102 case AD_REGS:
38103 case SIREG:
38104 case DIREG:
38105 case SSE_FIRST_REG:
38106 case FP_TOP_REG:
38107 case FP_SECOND_REG:
38108 case BND_REGS:
38109 return true;
38111 default:
38112 break;
38115 return false;
38118 /* If we are copying between general and FP registers, we need a memory
38119 location. The same is true for SSE and MMX registers.
38121 To optimize register_move_cost performance, allow inline variant.
38123 The macro can't work reliably when one of the CLASSES is class containing
38124 registers from multiple units (SSE, MMX, integer). We avoid this by never
38125 combining those units in single alternative in the machine description.
38126 Ensure that this constraint holds to avoid unexpected surprises.
38128 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
38129 enforce these sanity checks. */
38131 static inline bool
38132 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
38133 enum machine_mode mode, int strict)
38135 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
38136 return false;
38137 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
38138 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
38139 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
38140 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
38141 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
38142 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
38144 gcc_assert (!strict || lra_in_progress);
38145 return true;
38148 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
38149 return true;
38151 /* ??? This is a lie. We do have moves between mmx/general, and for
38152 mmx/sse2. But by saying we need secondary memory we discourage the
38153 register allocator from using the mmx registers unless needed. */
38154 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
38155 return true;
38157 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
38159 /* SSE1 doesn't have any direct moves from other classes. */
38160 if (!TARGET_SSE2)
38161 return true;
38163 /* If the target says that inter-unit moves are more expensive
38164 than moving through memory, then don't generate them. */
38165 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
38166 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
38167 return true;
38169 /* Between SSE and general, we have moves no larger than word size. */
38170 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38171 return true;
38174 return false;
38177 bool
38178 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
38179 enum machine_mode mode, int strict)
38181 return inline_secondary_memory_needed (class1, class2, mode, strict);
38184 /* Implement the TARGET_CLASS_MAX_NREGS hook.
38186 On the 80386, this is the size of MODE in words,
38187 except in the FP regs, where a single reg is always enough. */
38189 static unsigned char
38190 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
38192 if (MAYBE_INTEGER_CLASS_P (rclass))
38194 if (mode == XFmode)
38195 return (TARGET_64BIT ? 2 : 3);
38196 else if (mode == XCmode)
38197 return (TARGET_64BIT ? 4 : 6);
38198 else
38199 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
38201 else
38203 if (COMPLEX_MODE_P (mode))
38204 return 2;
38205 else
38206 return 1;
38210 /* Return true if the registers in CLASS cannot represent the change from
38211 modes FROM to TO. */
38213 bool
38214 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
38215 enum reg_class regclass)
38217 if (from == to)
38218 return false;
38220 /* x87 registers can't do subreg at all, as all values are reformatted
38221 to extended precision. */
38222 if (MAYBE_FLOAT_CLASS_P (regclass))
38223 return true;
38225 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
38227 /* Vector registers do not support QI or HImode loads. If we don't
38228 disallow a change to these modes, reload will assume it's ok to
38229 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
38230 the vec_dupv4hi pattern. */
38231 if (GET_MODE_SIZE (from) < 4)
38232 return true;
38234 /* Vector registers do not support subreg with nonzero offsets, which
38235 are otherwise valid for integer registers. Since we can't see
38236 whether we have a nonzero offset from here, prohibit all
38237 nonparadoxical subregs changing size. */
38238 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
38239 return true;
38242 return false;
38245 /* Return the cost of moving data of mode M between a
38246 register and memory. A value of 2 is the default; this cost is
38247 relative to those in `REGISTER_MOVE_COST'.
38249 This function is used extensively by register_move_cost that is used to
38250 build tables at startup. Make it inline in this case.
38251 When IN is 2, return maximum of in and out move cost.
38253 If moving between registers and memory is more expensive than
38254 between two registers, you should define this macro to express the
38255 relative cost.
38257 Model also increased moving costs of QImode registers in non
38258 Q_REGS classes.
38260 static inline int
38261 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
38262 int in)
38264 int cost;
38265 if (FLOAT_CLASS_P (regclass))
38267 int index;
38268 switch (mode)
38270 case SFmode:
38271 index = 0;
38272 break;
38273 case DFmode:
38274 index = 1;
38275 break;
38276 case XFmode:
38277 index = 2;
38278 break;
38279 default:
38280 return 100;
38282 if (in == 2)
38283 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
38284 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
38286 if (SSE_CLASS_P (regclass))
38288 int index;
38289 switch (GET_MODE_SIZE (mode))
38291 case 4:
38292 index = 0;
38293 break;
38294 case 8:
38295 index = 1;
38296 break;
38297 case 16:
38298 index = 2;
38299 break;
38300 default:
38301 return 100;
38303 if (in == 2)
38304 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
38305 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
38307 if (MMX_CLASS_P (regclass))
38309 int index;
38310 switch (GET_MODE_SIZE (mode))
38312 case 4:
38313 index = 0;
38314 break;
38315 case 8:
38316 index = 1;
38317 break;
38318 default:
38319 return 100;
38321 if (in)
38322 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
38323 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
38325 switch (GET_MODE_SIZE (mode))
38327 case 1:
38328 if (Q_CLASS_P (regclass) || TARGET_64BIT)
38330 if (!in)
38331 return ix86_cost->int_store[0];
38332 if (TARGET_PARTIAL_REG_DEPENDENCY
38333 && optimize_function_for_speed_p (cfun))
38334 cost = ix86_cost->movzbl_load;
38335 else
38336 cost = ix86_cost->int_load[0];
38337 if (in == 2)
38338 return MAX (cost, ix86_cost->int_store[0]);
38339 return cost;
38341 else
38343 if (in == 2)
38344 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
38345 if (in)
38346 return ix86_cost->movzbl_load;
38347 else
38348 return ix86_cost->int_store[0] + 4;
38350 break;
38351 case 2:
38352 if (in == 2)
38353 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
38354 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
38355 default:
38356 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
38357 if (mode == TFmode)
38358 mode = XFmode;
38359 if (in == 2)
38360 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
38361 else if (in)
38362 cost = ix86_cost->int_load[2];
38363 else
38364 cost = ix86_cost->int_store[2];
38365 return (cost * (((int) GET_MODE_SIZE (mode)
38366 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
38370 static int
38371 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
38372 bool in)
38374 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
38378 /* Return the cost of moving data from a register in class CLASS1 to
38379 one in class CLASS2.
38381 It is not required that the cost always equal 2 when FROM is the same as TO;
38382 on some machines it is expensive to move between registers if they are not
38383 general registers. */
38385 static int
38386 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
38387 reg_class_t class2_i)
38389 enum reg_class class1 = (enum reg_class) class1_i;
38390 enum reg_class class2 = (enum reg_class) class2_i;
38392 /* In case we require secondary memory, compute cost of the store followed
38393 by load. In order to avoid bad register allocation choices, we need
38394 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
38396 if (inline_secondary_memory_needed (class1, class2, mode, 0))
38398 int cost = 1;
38400 cost += inline_memory_move_cost (mode, class1, 2);
38401 cost += inline_memory_move_cost (mode, class2, 2);
38403 /* In case of copying from general_purpose_register we may emit multiple
38404 stores followed by single load causing memory size mismatch stall.
38405 Count this as arbitrarily high cost of 20. */
38406 if (targetm.class_max_nregs (class1, mode)
38407 > targetm.class_max_nregs (class2, mode))
38408 cost += 20;
38410 /* In the case of FP/MMX moves, the registers actually overlap, and we
38411 have to switch modes in order to treat them differently. */
38412 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
38413 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
38414 cost += 20;
38416 return cost;
38419 /* Moves between SSE/MMX and integer unit are expensive. */
38420 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
38421 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
38423 /* ??? By keeping returned value relatively high, we limit the number
38424 of moves between integer and MMX/SSE registers for all targets.
38425 Additionally, high value prevents problem with x86_modes_tieable_p(),
38426 where integer modes in MMX/SSE registers are not tieable
38427 because of missing QImode and HImode moves to, from or between
38428 MMX/SSE registers. */
38429 return MAX (8, ix86_cost->mmxsse_to_integer);
38431 if (MAYBE_FLOAT_CLASS_P (class1))
38432 return ix86_cost->fp_move;
38433 if (MAYBE_SSE_CLASS_P (class1))
38434 return ix86_cost->sse_move;
38435 if (MAYBE_MMX_CLASS_P (class1))
38436 return ix86_cost->mmx_move;
38437 return 2;
38440 /* Return TRUE if hard register REGNO can hold a value of machine-mode
38441 MODE. */
38443 bool
38444 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
38446 /* Flags and only flags can only hold CCmode values. */
38447 if (CC_REGNO_P (regno))
38448 return GET_MODE_CLASS (mode) == MODE_CC;
38449 if (GET_MODE_CLASS (mode) == MODE_CC
38450 || GET_MODE_CLASS (mode) == MODE_RANDOM
38451 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
38452 return false;
38453 if (STACK_REGNO_P (regno))
38454 return VALID_FP_MODE_P (mode);
38455 if (MASK_REGNO_P (regno))
38456 return VALID_MASK_REG_MODE (mode);
38457 if (BND_REGNO_P (regno))
38458 return VALID_BND_REG_MODE (mode);
38459 if (SSE_REGNO_P (regno))
38461 /* We implement the move patterns for all vector modes into and
38462 out of SSE registers, even when no operation instructions
38463 are available. */
38465 /* For AVX-512 we allow, regardless of regno:
38466 - XI mode
38467 - any of 512-bit wide vector mode
38468 - any scalar mode. */
38469 if (TARGET_AVX512F
38470 && (mode == XImode
38471 || VALID_AVX512F_REG_MODE (mode)
38472 || VALID_AVX512F_SCALAR_MODE (mode)))
38473 return true;
38475 /* xmm16-xmm31 are only available for AVX-512. */
38476 if (EXT_REX_SSE_REGNO_P (regno))
38477 return false;
38479 /* OImode and AVX modes are available only when AVX is enabled. */
38480 return ((TARGET_AVX
38481 && VALID_AVX256_REG_OR_OI_MODE (mode))
38482 || VALID_SSE_REG_MODE (mode)
38483 || VALID_SSE2_REG_MODE (mode)
38484 || VALID_MMX_REG_MODE (mode)
38485 || VALID_MMX_REG_MODE_3DNOW (mode));
38487 if (MMX_REGNO_P (regno))
38489 /* We implement the move patterns for 3DNOW modes even in MMX mode,
38490 so if the register is available at all, then we can move data of
38491 the given mode into or out of it. */
38492 return (VALID_MMX_REG_MODE (mode)
38493 || VALID_MMX_REG_MODE_3DNOW (mode));
38496 if (mode == QImode)
38498 /* Take care for QImode values - they can be in non-QI regs,
38499 but then they do cause partial register stalls. */
38500 if (ANY_QI_REGNO_P (regno))
38501 return true;
38502 if (!TARGET_PARTIAL_REG_STALL)
38503 return true;
38504 /* LRA checks if the hard register is OK for the given mode.
38505 QImode values can live in non-QI regs, so we allow all
38506 registers here. */
38507 if (lra_in_progress)
38508 return true;
38509 return !can_create_pseudo_p ();
38511 /* We handle both integer and floats in the general purpose registers. */
38512 else if (VALID_INT_MODE_P (mode))
38513 return true;
38514 else if (VALID_FP_MODE_P (mode))
38515 return true;
38516 else if (VALID_DFP_MODE_P (mode))
38517 return true;
38518 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
38519 on to use that value in smaller contexts, this can easily force a
38520 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
38521 supporting DImode, allow it. */
38522 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
38523 return true;
38525 return false;
38528 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
38529 tieable integer mode. */
38531 static bool
38532 ix86_tieable_integer_mode_p (enum machine_mode mode)
38534 switch (mode)
38536 case HImode:
38537 case SImode:
38538 return true;
38540 case QImode:
38541 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
38543 case DImode:
38544 return TARGET_64BIT;
38546 default:
38547 return false;
38551 /* Return true if MODE1 is accessible in a register that can hold MODE2
38552 without copying. That is, all register classes that can hold MODE2
38553 can also hold MODE1. */
38555 bool
38556 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
38558 if (mode1 == mode2)
38559 return true;
38561 if (ix86_tieable_integer_mode_p (mode1)
38562 && ix86_tieable_integer_mode_p (mode2))
38563 return true;
38565 /* MODE2 being XFmode implies fp stack or general regs, which means we
38566 can tie any smaller floating point modes to it. Note that we do not
38567 tie this with TFmode. */
38568 if (mode2 == XFmode)
38569 return mode1 == SFmode || mode1 == DFmode;
38571 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
38572 that we can tie it with SFmode. */
38573 if (mode2 == DFmode)
38574 return mode1 == SFmode;
38576 /* If MODE2 is only appropriate for an SSE register, then tie with
38577 any other mode acceptable to SSE registers. */
38578 if (GET_MODE_SIZE (mode2) == 32
38579 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38580 return (GET_MODE_SIZE (mode1) == 32
38581 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38582 if (GET_MODE_SIZE (mode2) == 16
38583 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38584 return (GET_MODE_SIZE (mode1) == 16
38585 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38587 /* If MODE2 is appropriate for an MMX register, then tie
38588 with any other mode acceptable to MMX registers. */
38589 if (GET_MODE_SIZE (mode2) == 8
38590 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
38591 return (GET_MODE_SIZE (mode1) == 8
38592 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
38594 return false;
38597 /* Return the cost of moving between two registers of mode MODE. */
38599 static int
38600 ix86_set_reg_reg_cost (enum machine_mode mode)
38602 unsigned int units = UNITS_PER_WORD;
38604 switch (GET_MODE_CLASS (mode))
38606 default:
38607 break;
38609 case MODE_CC:
38610 units = GET_MODE_SIZE (CCmode);
38611 break;
38613 case MODE_FLOAT:
38614 if ((TARGET_SSE && mode == TFmode)
38615 || (TARGET_80387 && mode == XFmode)
38616 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
38617 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
38618 units = GET_MODE_SIZE (mode);
38619 break;
38621 case MODE_COMPLEX_FLOAT:
38622 if ((TARGET_SSE && mode == TCmode)
38623 || (TARGET_80387 && mode == XCmode)
38624 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
38625 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
38626 units = GET_MODE_SIZE (mode);
38627 break;
38629 case MODE_VECTOR_INT:
38630 case MODE_VECTOR_FLOAT:
38631 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
38632 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
38633 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
38634 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
38635 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
38636 units = GET_MODE_SIZE (mode);
38639 /* Return the cost of moving between two registers of mode MODE,
38640 assuming that the move will be in pieces of at most UNITS bytes. */
38641 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
38644 /* Compute a (partial) cost for rtx X. Return true if the complete
38645 cost has been computed, and false if subexpressions should be
38646 scanned. In either case, *TOTAL contains the cost result. */
38648 static bool
38649 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
38650 bool speed)
38652 rtx mask;
38653 enum rtx_code code = (enum rtx_code) code_i;
38654 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
38655 enum machine_mode mode = GET_MODE (x);
38656 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
38658 switch (code)
38660 case SET:
38661 if (register_operand (SET_DEST (x), VOIDmode)
38662 && reg_or_0_operand (SET_SRC (x), VOIDmode))
38664 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
38665 return true;
38667 return false;
38669 case CONST_INT:
38670 case CONST:
38671 case LABEL_REF:
38672 case SYMBOL_REF:
38673 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
38674 *total = 3;
38675 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
38676 *total = 2;
38677 else if (flag_pic && SYMBOLIC_CONST (x)
38678 && (!TARGET_64BIT
38679 || (!GET_CODE (x) != LABEL_REF
38680 && (GET_CODE (x) != SYMBOL_REF
38681 || !SYMBOL_REF_LOCAL_P (x)))))
38682 *total = 1;
38683 else
38684 *total = 0;
38685 return true;
38687 case CONST_DOUBLE:
38688 if (mode == VOIDmode)
38690 *total = 0;
38691 return true;
38693 switch (standard_80387_constant_p (x))
38695 case 1: /* 0.0 */
38696 *total = 1;
38697 return true;
38698 default: /* Other constants */
38699 *total = 2;
38700 return true;
38701 case 0:
38702 case -1:
38703 break;
38705 if (SSE_FLOAT_MODE_P (mode))
38707 case CONST_VECTOR:
38708 switch (standard_sse_constant_p (x))
38710 case 0:
38711 break;
38712 case 1: /* 0: xor eliminates false dependency */
38713 *total = 0;
38714 return true;
38715 default: /* -1: cmp contains false dependency */
38716 *total = 1;
38717 return true;
38720 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38721 it'll probably end up. Add a penalty for size. */
38722 *total = (COSTS_N_INSNS (1)
38723 + (flag_pic != 0 && !TARGET_64BIT)
38724 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
38725 return true;
38727 case ZERO_EXTEND:
38728 /* The zero extensions is often completely free on x86_64, so make
38729 it as cheap as possible. */
38730 if (TARGET_64BIT && mode == DImode
38731 && GET_MODE (XEXP (x, 0)) == SImode)
38732 *total = 1;
38733 else if (TARGET_ZERO_EXTEND_WITH_AND)
38734 *total = cost->add;
38735 else
38736 *total = cost->movzx;
38737 return false;
38739 case SIGN_EXTEND:
38740 *total = cost->movsx;
38741 return false;
38743 case ASHIFT:
38744 if (SCALAR_INT_MODE_P (mode)
38745 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38746 && CONST_INT_P (XEXP (x, 1)))
38748 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38749 if (value == 1)
38751 *total = cost->add;
38752 return false;
38754 if ((value == 2 || value == 3)
38755 && cost->lea <= cost->shift_const)
38757 *total = cost->lea;
38758 return false;
38761 /* FALLTHRU */
38763 case ROTATE:
38764 case ASHIFTRT:
38765 case LSHIFTRT:
38766 case ROTATERT:
38767 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38769 /* ??? Should be SSE vector operation cost. */
38770 /* At least for published AMD latencies, this really is the same
38771 as the latency for a simple fpu operation like fabs. */
38772 /* V*QImode is emulated with 1-11 insns. */
38773 if (mode == V16QImode || mode == V32QImode)
38775 int count = 11;
38776 if (TARGET_XOP && mode == V16QImode)
38778 /* For XOP we use vpshab, which requires a broadcast of the
38779 value to the variable shift insn. For constants this
38780 means a V16Q const in mem; even when we can perform the
38781 shift with one insn set the cost to prefer paddb. */
38782 if (CONSTANT_P (XEXP (x, 1)))
38784 *total = (cost->fabs
38785 + rtx_cost (XEXP (x, 0), code, 0, speed)
38786 + (speed ? 2 : COSTS_N_BYTES (16)));
38787 return true;
38789 count = 3;
38791 else if (TARGET_SSSE3)
38792 count = 7;
38793 *total = cost->fabs * count;
38795 else
38796 *total = cost->fabs;
38798 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38800 if (CONST_INT_P (XEXP (x, 1)))
38802 if (INTVAL (XEXP (x, 1)) > 32)
38803 *total = cost->shift_const + COSTS_N_INSNS (2);
38804 else
38805 *total = cost->shift_const * 2;
38807 else
38809 if (GET_CODE (XEXP (x, 1)) == AND)
38810 *total = cost->shift_var * 2;
38811 else
38812 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38815 else
38817 if (CONST_INT_P (XEXP (x, 1)))
38818 *total = cost->shift_const;
38819 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38820 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38822 /* Return the cost after shift-and truncation. */
38823 *total = cost->shift_var;
38824 return true;
38826 else
38827 *total = cost->shift_var;
38829 return false;
38831 case FMA:
38833 rtx sub;
38835 gcc_assert (FLOAT_MODE_P (mode));
38836 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38838 /* ??? SSE scalar/vector cost should be used here. */
38839 /* ??? Bald assumption that fma has the same cost as fmul. */
38840 *total = cost->fmul;
38841 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38843 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38844 sub = XEXP (x, 0);
38845 if (GET_CODE (sub) == NEG)
38846 sub = XEXP (sub, 0);
38847 *total += rtx_cost (sub, FMA, 0, speed);
38849 sub = XEXP (x, 2);
38850 if (GET_CODE (sub) == NEG)
38851 sub = XEXP (sub, 0);
38852 *total += rtx_cost (sub, FMA, 2, speed);
38853 return true;
38856 case MULT:
38857 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38859 /* ??? SSE scalar cost should be used here. */
38860 *total = cost->fmul;
38861 return false;
38863 else if (X87_FLOAT_MODE_P (mode))
38865 *total = cost->fmul;
38866 return false;
38868 else if (FLOAT_MODE_P (mode))
38870 /* ??? SSE vector cost should be used here. */
38871 *total = cost->fmul;
38872 return false;
38874 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38876 /* V*QImode is emulated with 7-13 insns. */
38877 if (mode == V16QImode || mode == V32QImode)
38879 int extra = 11;
38880 if (TARGET_XOP && mode == V16QImode)
38881 extra = 5;
38882 else if (TARGET_SSSE3)
38883 extra = 6;
38884 *total = cost->fmul * 2 + cost->fabs * extra;
38886 /* V*DImode is emulated with 5-8 insns. */
38887 else if (mode == V2DImode || mode == V4DImode)
38889 if (TARGET_XOP && mode == V2DImode)
38890 *total = cost->fmul * 2 + cost->fabs * 3;
38891 else
38892 *total = cost->fmul * 3 + cost->fabs * 5;
38894 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38895 insns, including two PMULUDQ. */
38896 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38897 *total = cost->fmul * 2 + cost->fabs * 5;
38898 else
38899 *total = cost->fmul;
38900 return false;
38902 else
38904 rtx op0 = XEXP (x, 0);
38905 rtx op1 = XEXP (x, 1);
38906 int nbits;
38907 if (CONST_INT_P (XEXP (x, 1)))
38909 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38910 for (nbits = 0; value != 0; value &= value - 1)
38911 nbits++;
38913 else
38914 /* This is arbitrary. */
38915 nbits = 7;
38917 /* Compute costs correctly for widening multiplication. */
38918 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38919 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38920 == GET_MODE_SIZE (mode))
38922 int is_mulwiden = 0;
38923 enum machine_mode inner_mode = GET_MODE (op0);
38925 if (GET_CODE (op0) == GET_CODE (op1))
38926 is_mulwiden = 1, op1 = XEXP (op1, 0);
38927 else if (CONST_INT_P (op1))
38929 if (GET_CODE (op0) == SIGN_EXTEND)
38930 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38931 == INTVAL (op1);
38932 else
38933 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38936 if (is_mulwiden)
38937 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38940 *total = (cost->mult_init[MODE_INDEX (mode)]
38941 + nbits * cost->mult_bit
38942 + rtx_cost (op0, outer_code, opno, speed)
38943 + rtx_cost (op1, outer_code, opno, speed));
38945 return true;
38948 case DIV:
38949 case UDIV:
38950 case MOD:
38951 case UMOD:
38952 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38953 /* ??? SSE cost should be used here. */
38954 *total = cost->fdiv;
38955 else if (X87_FLOAT_MODE_P (mode))
38956 *total = cost->fdiv;
38957 else if (FLOAT_MODE_P (mode))
38958 /* ??? SSE vector cost should be used here. */
38959 *total = cost->fdiv;
38960 else
38961 *total = cost->divide[MODE_INDEX (mode)];
38962 return false;
38964 case PLUS:
38965 if (GET_MODE_CLASS (mode) == MODE_INT
38966 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38968 if (GET_CODE (XEXP (x, 0)) == PLUS
38969 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38970 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38971 && CONSTANT_P (XEXP (x, 1)))
38973 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38974 if (val == 2 || val == 4 || val == 8)
38976 *total = cost->lea;
38977 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38978 outer_code, opno, speed);
38979 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38980 outer_code, opno, speed);
38981 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38982 return true;
38985 else if (GET_CODE (XEXP (x, 0)) == MULT
38986 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38988 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38989 if (val == 2 || val == 4 || val == 8)
38991 *total = cost->lea;
38992 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38993 outer_code, opno, speed);
38994 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38995 return true;
38998 else if (GET_CODE (XEXP (x, 0)) == PLUS)
39000 *total = cost->lea;
39001 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
39002 outer_code, opno, speed);
39003 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
39004 outer_code, opno, speed);
39005 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
39006 return true;
39009 /* FALLTHRU */
39011 case MINUS:
39012 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39014 /* ??? SSE cost should be used here. */
39015 *total = cost->fadd;
39016 return false;
39018 else if (X87_FLOAT_MODE_P (mode))
39020 *total = cost->fadd;
39021 return false;
39023 else if (FLOAT_MODE_P (mode))
39025 /* ??? SSE vector cost should be used here. */
39026 *total = cost->fadd;
39027 return false;
39029 /* FALLTHRU */
39031 case AND:
39032 case IOR:
39033 case XOR:
39034 if (GET_MODE_CLASS (mode) == MODE_INT
39035 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39037 *total = (cost->add * 2
39038 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
39039 << (GET_MODE (XEXP (x, 0)) != DImode))
39040 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
39041 << (GET_MODE (XEXP (x, 1)) != DImode)));
39042 return true;
39044 /* FALLTHRU */
39046 case NEG:
39047 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39049 /* ??? SSE cost should be used here. */
39050 *total = cost->fchs;
39051 return false;
39053 else if (X87_FLOAT_MODE_P (mode))
39055 *total = cost->fchs;
39056 return false;
39058 else if (FLOAT_MODE_P (mode))
39060 /* ??? SSE vector cost should be used here. */
39061 *total = cost->fchs;
39062 return false;
39064 /* FALLTHRU */
39066 case NOT:
39067 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39069 /* ??? Should be SSE vector operation cost. */
39070 /* At least for published AMD latencies, this really is the same
39071 as the latency for a simple fpu operation like fabs. */
39072 *total = cost->fabs;
39074 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39075 *total = cost->add * 2;
39076 else
39077 *total = cost->add;
39078 return false;
39080 case COMPARE:
39081 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
39082 && XEXP (XEXP (x, 0), 1) == const1_rtx
39083 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
39084 && XEXP (x, 1) == const0_rtx)
39086 /* This kind of construct is implemented using test[bwl].
39087 Treat it as if we had an AND. */
39088 *total = (cost->add
39089 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
39090 + rtx_cost (const1_rtx, outer_code, opno, speed));
39091 return true;
39093 return false;
39095 case FLOAT_EXTEND:
39096 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
39097 *total = 0;
39098 return false;
39100 case ABS:
39101 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39102 /* ??? SSE cost should be used here. */
39103 *total = cost->fabs;
39104 else if (X87_FLOAT_MODE_P (mode))
39105 *total = cost->fabs;
39106 else if (FLOAT_MODE_P (mode))
39107 /* ??? SSE vector cost should be used here. */
39108 *total = cost->fabs;
39109 return false;
39111 case SQRT:
39112 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39113 /* ??? SSE cost should be used here. */
39114 *total = cost->fsqrt;
39115 else if (X87_FLOAT_MODE_P (mode))
39116 *total = cost->fsqrt;
39117 else if (FLOAT_MODE_P (mode))
39118 /* ??? SSE vector cost should be used here. */
39119 *total = cost->fsqrt;
39120 return false;
39122 case UNSPEC:
39123 if (XINT (x, 1) == UNSPEC_TP)
39124 *total = 0;
39125 return false;
39127 case VEC_SELECT:
39128 case VEC_CONCAT:
39129 case VEC_DUPLICATE:
39130 /* ??? Assume all of these vector manipulation patterns are
39131 recognizable. In which case they all pretty much have the
39132 same cost. */
39133 *total = cost->fabs;
39134 return true;
39135 case VEC_MERGE:
39136 mask = XEXP (x, 2);
39137 /* This is masked instruction, assume the same cost,
39138 as nonmasked variant. */
39139 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
39140 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
39141 else
39142 *total = cost->fabs;
39143 return true;
39145 default:
39146 return false;
39150 #if TARGET_MACHO
39152 static int current_machopic_label_num;
39154 /* Given a symbol name and its associated stub, write out the
39155 definition of the stub. */
39157 void
39158 machopic_output_stub (FILE *file, const char *symb, const char *stub)
39160 unsigned int length;
39161 char *binder_name, *symbol_name, lazy_ptr_name[32];
39162 int label = ++current_machopic_label_num;
39164 /* For 64-bit we shouldn't get here. */
39165 gcc_assert (!TARGET_64BIT);
39167 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
39168 symb = targetm.strip_name_encoding (symb);
39170 length = strlen (stub);
39171 binder_name = XALLOCAVEC (char, length + 32);
39172 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
39174 length = strlen (symb);
39175 symbol_name = XALLOCAVEC (char, length + 32);
39176 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
39178 sprintf (lazy_ptr_name, "L%d$lz", label);
39180 if (MACHOPIC_ATT_STUB)
39181 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
39182 else if (MACHOPIC_PURE)
39183 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
39184 else
39185 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
39187 fprintf (file, "%s:\n", stub);
39188 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
39190 if (MACHOPIC_ATT_STUB)
39192 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
39194 else if (MACHOPIC_PURE)
39196 /* PIC stub. */
39197 /* 25-byte PIC stub using "CALL get_pc_thunk". */
39198 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
39199 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
39200 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
39201 label, lazy_ptr_name, label);
39202 fprintf (file, "\tjmp\t*%%ecx\n");
39204 else
39205 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
39207 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
39208 it needs no stub-binding-helper. */
39209 if (MACHOPIC_ATT_STUB)
39210 return;
39212 fprintf (file, "%s:\n", binder_name);
39214 if (MACHOPIC_PURE)
39216 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
39217 fprintf (file, "\tpushl\t%%ecx\n");
39219 else
39220 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
39222 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
39224 /* N.B. Keep the correspondence of these
39225 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
39226 old-pic/new-pic/non-pic stubs; altering this will break
39227 compatibility with existing dylibs. */
39228 if (MACHOPIC_PURE)
39230 /* 25-byte PIC stub using "CALL get_pc_thunk". */
39231 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
39233 else
39234 /* 16-byte -mdynamic-no-pic stub. */
39235 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
39237 fprintf (file, "%s:\n", lazy_ptr_name);
39238 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
39239 fprintf (file, ASM_LONG "%s\n", binder_name);
39241 #endif /* TARGET_MACHO */
39243 /* Order the registers for register allocator. */
39245 void
39246 x86_order_regs_for_local_alloc (void)
39248 int pos = 0;
39249 int i;
39251 /* First allocate the local general purpose registers. */
39252 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
39253 if (GENERAL_REGNO_P (i) && call_used_regs[i])
39254 reg_alloc_order [pos++] = i;
39256 /* Global general purpose registers. */
39257 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
39258 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
39259 reg_alloc_order [pos++] = i;
39261 /* x87 registers come first in case we are doing FP math
39262 using them. */
39263 if (!TARGET_SSE_MATH)
39264 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
39265 reg_alloc_order [pos++] = i;
39267 /* SSE registers. */
39268 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
39269 reg_alloc_order [pos++] = i;
39270 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
39271 reg_alloc_order [pos++] = i;
39273 /* Extended REX SSE registers. */
39274 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
39275 reg_alloc_order [pos++] = i;
39277 /* Mask register. */
39278 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
39279 reg_alloc_order [pos++] = i;
39281 /* MPX bound registers. */
39282 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
39283 reg_alloc_order [pos++] = i;
39285 /* x87 registers. */
39286 if (TARGET_SSE_MATH)
39287 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
39288 reg_alloc_order [pos++] = i;
39290 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
39291 reg_alloc_order [pos++] = i;
39293 /* Initialize the rest of array as we do not allocate some registers
39294 at all. */
39295 while (pos < FIRST_PSEUDO_REGISTER)
39296 reg_alloc_order [pos++] = 0;
39299 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
39300 in struct attribute_spec handler. */
39301 static tree
39302 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
39303 tree args,
39304 int flags ATTRIBUTE_UNUSED,
39305 bool *no_add_attrs)
39307 if (TREE_CODE (*node) != FUNCTION_TYPE
39308 && TREE_CODE (*node) != METHOD_TYPE
39309 && TREE_CODE (*node) != FIELD_DECL
39310 && TREE_CODE (*node) != TYPE_DECL)
39312 warning (OPT_Wattributes, "%qE attribute only applies to functions",
39313 name);
39314 *no_add_attrs = true;
39315 return NULL_TREE;
39317 if (TARGET_64BIT)
39319 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
39320 name);
39321 *no_add_attrs = true;
39322 return NULL_TREE;
39324 if (is_attribute_p ("callee_pop_aggregate_return", name))
39326 tree cst;
39328 cst = TREE_VALUE (args);
39329 if (TREE_CODE (cst) != INTEGER_CST)
39331 warning (OPT_Wattributes,
39332 "%qE attribute requires an integer constant argument",
39333 name);
39334 *no_add_attrs = true;
39336 else if (compare_tree_int (cst, 0) != 0
39337 && compare_tree_int (cst, 1) != 0)
39339 warning (OPT_Wattributes,
39340 "argument to %qE attribute is neither zero, nor one",
39341 name);
39342 *no_add_attrs = true;
39345 return NULL_TREE;
39348 return NULL_TREE;
39351 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
39352 struct attribute_spec.handler. */
39353 static tree
39354 ix86_handle_abi_attribute (tree *node, tree name,
39355 tree args ATTRIBUTE_UNUSED,
39356 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
39358 if (TREE_CODE (*node) != FUNCTION_TYPE
39359 && TREE_CODE (*node) != METHOD_TYPE
39360 && TREE_CODE (*node) != FIELD_DECL
39361 && TREE_CODE (*node) != TYPE_DECL)
39363 warning (OPT_Wattributes, "%qE attribute only applies to functions",
39364 name);
39365 *no_add_attrs = true;
39366 return NULL_TREE;
39369 /* Can combine regparm with all attributes but fastcall. */
39370 if (is_attribute_p ("ms_abi", name))
39372 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
39374 error ("ms_abi and sysv_abi attributes are not compatible");
39377 return NULL_TREE;
39379 else if (is_attribute_p ("sysv_abi", name))
39381 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
39383 error ("ms_abi and sysv_abi attributes are not compatible");
39386 return NULL_TREE;
39389 return NULL_TREE;
39392 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
39393 struct attribute_spec.handler. */
39394 static tree
39395 ix86_handle_struct_attribute (tree *node, tree name,
39396 tree args ATTRIBUTE_UNUSED,
39397 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
39399 tree *type = NULL;
39400 if (DECL_P (*node))
39402 if (TREE_CODE (*node) == TYPE_DECL)
39403 type = &TREE_TYPE (*node);
39405 else
39406 type = node;
39408 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
39410 warning (OPT_Wattributes, "%qE attribute ignored",
39411 name);
39412 *no_add_attrs = true;
39415 else if ((is_attribute_p ("ms_struct", name)
39416 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
39417 || ((is_attribute_p ("gcc_struct", name)
39418 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
39420 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
39421 name);
39422 *no_add_attrs = true;
39425 return NULL_TREE;
39428 static tree
39429 ix86_handle_fndecl_attribute (tree *node, tree name,
39430 tree args ATTRIBUTE_UNUSED,
39431 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
39433 if (TREE_CODE (*node) != FUNCTION_DECL)
39435 warning (OPT_Wattributes, "%qE attribute only applies to functions",
39436 name);
39437 *no_add_attrs = true;
39439 return NULL_TREE;
39442 static bool
39443 ix86_ms_bitfield_layout_p (const_tree record_type)
39445 return ((TARGET_MS_BITFIELD_LAYOUT
39446 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
39447 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
39450 /* Returns an expression indicating where the this parameter is
39451 located on entry to the FUNCTION. */
39453 static rtx
39454 x86_this_parameter (tree function)
39456 tree type = TREE_TYPE (function);
39457 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
39458 int nregs;
39460 if (TARGET_64BIT)
39462 const int *parm_regs;
39464 if (ix86_function_type_abi (type) == MS_ABI)
39465 parm_regs = x86_64_ms_abi_int_parameter_registers;
39466 else
39467 parm_regs = x86_64_int_parameter_registers;
39468 return gen_rtx_REG (Pmode, parm_regs[aggr]);
39471 nregs = ix86_function_regparm (type, function);
39473 if (nregs > 0 && !stdarg_p (type))
39475 int regno;
39476 unsigned int ccvt = ix86_get_callcvt (type);
39478 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
39479 regno = aggr ? DX_REG : CX_REG;
39480 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
39482 regno = CX_REG;
39483 if (aggr)
39484 return gen_rtx_MEM (SImode,
39485 plus_constant (Pmode, stack_pointer_rtx, 4));
39487 else
39489 regno = AX_REG;
39490 if (aggr)
39492 regno = DX_REG;
39493 if (nregs == 1)
39494 return gen_rtx_MEM (SImode,
39495 plus_constant (Pmode,
39496 stack_pointer_rtx, 4));
39499 return gen_rtx_REG (SImode, regno);
39502 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
39503 aggr ? 8 : 4));
39506 /* Determine whether x86_output_mi_thunk can succeed. */
39508 static bool
39509 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
39510 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
39511 HOST_WIDE_INT vcall_offset, const_tree function)
39513 /* 64-bit can handle anything. */
39514 if (TARGET_64BIT)
39515 return true;
39517 /* For 32-bit, everything's fine if we have one free register. */
39518 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
39519 return true;
39521 /* Need a free register for vcall_offset. */
39522 if (vcall_offset)
39523 return false;
39525 /* Need a free register for GOT references. */
39526 if (flag_pic && !targetm.binds_local_p (function))
39527 return false;
39529 /* Otherwise ok. */
39530 return true;
39533 /* Output the assembler code for a thunk function. THUNK_DECL is the
39534 declaration for the thunk function itself, FUNCTION is the decl for
39535 the target function. DELTA is an immediate constant offset to be
39536 added to THIS. If VCALL_OFFSET is nonzero, the word at
39537 *(*this + vcall_offset) should be added to THIS. */
39539 static void
39540 x86_output_mi_thunk (FILE *file,
39541 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
39542 HOST_WIDE_INT vcall_offset, tree function)
39544 rtx this_param = x86_this_parameter (function);
39545 rtx this_reg, tmp, fnaddr;
39546 unsigned int tmp_regno;
39548 if (TARGET_64BIT)
39549 tmp_regno = R10_REG;
39550 else
39552 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
39553 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
39554 tmp_regno = AX_REG;
39555 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
39556 tmp_regno = DX_REG;
39557 else
39558 tmp_regno = CX_REG;
39561 emit_note (NOTE_INSN_PROLOGUE_END);
39563 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
39564 pull it in now and let DELTA benefit. */
39565 if (REG_P (this_param))
39566 this_reg = this_param;
39567 else if (vcall_offset)
39569 /* Put the this parameter into %eax. */
39570 this_reg = gen_rtx_REG (Pmode, AX_REG);
39571 emit_move_insn (this_reg, this_param);
39573 else
39574 this_reg = NULL_RTX;
39576 /* Adjust the this parameter by a fixed constant. */
39577 if (delta)
39579 rtx delta_rtx = GEN_INT (delta);
39580 rtx delta_dst = this_reg ? this_reg : this_param;
39582 if (TARGET_64BIT)
39584 if (!x86_64_general_operand (delta_rtx, Pmode))
39586 tmp = gen_rtx_REG (Pmode, tmp_regno);
39587 emit_move_insn (tmp, delta_rtx);
39588 delta_rtx = tmp;
39592 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
39595 /* Adjust the this parameter by a value stored in the vtable. */
39596 if (vcall_offset)
39598 rtx vcall_addr, vcall_mem, this_mem;
39600 tmp = gen_rtx_REG (Pmode, tmp_regno);
39602 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
39603 if (Pmode != ptr_mode)
39604 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
39605 emit_move_insn (tmp, this_mem);
39607 /* Adjust the this parameter. */
39608 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
39609 if (TARGET_64BIT
39610 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
39612 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
39613 emit_move_insn (tmp2, GEN_INT (vcall_offset));
39614 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
39617 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
39618 if (Pmode != ptr_mode)
39619 emit_insn (gen_addsi_1_zext (this_reg,
39620 gen_rtx_REG (ptr_mode,
39621 REGNO (this_reg)),
39622 vcall_mem));
39623 else
39624 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
39627 /* If necessary, drop THIS back to its stack slot. */
39628 if (this_reg && this_reg != this_param)
39629 emit_move_insn (this_param, this_reg);
39631 fnaddr = XEXP (DECL_RTL (function), 0);
39632 if (TARGET_64BIT)
39634 if (!flag_pic || targetm.binds_local_p (function)
39635 || TARGET_PECOFF)
39637 else
39639 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
39640 tmp = gen_rtx_CONST (Pmode, tmp);
39641 fnaddr = gen_const_mem (Pmode, tmp);
39644 else
39646 if (!flag_pic || targetm.binds_local_p (function))
39648 #if TARGET_MACHO
39649 else if (TARGET_MACHO)
39651 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
39652 fnaddr = XEXP (fnaddr, 0);
39654 #endif /* TARGET_MACHO */
39655 else
39657 tmp = gen_rtx_REG (Pmode, CX_REG);
39658 output_set_got (tmp, NULL_RTX);
39660 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
39661 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
39662 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
39663 fnaddr = gen_const_mem (Pmode, fnaddr);
39667 /* Our sibling call patterns do not allow memories, because we have no
39668 predicate that can distinguish between frame and non-frame memory.
39669 For our purposes here, we can get away with (ab)using a jump pattern,
39670 because we're going to do no optimization. */
39671 if (MEM_P (fnaddr))
39672 emit_jump_insn (gen_indirect_jump (fnaddr));
39673 else
39675 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
39676 fnaddr = legitimize_pic_address (fnaddr,
39677 gen_rtx_REG (Pmode, tmp_regno));
39679 if (!sibcall_insn_operand (fnaddr, word_mode))
39681 tmp = gen_rtx_REG (word_mode, tmp_regno);
39682 if (GET_MODE (fnaddr) != word_mode)
39683 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
39684 emit_move_insn (tmp, fnaddr);
39685 fnaddr = tmp;
39688 tmp = gen_rtx_MEM (QImode, fnaddr);
39689 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39690 tmp = emit_call_insn (tmp);
39691 SIBLING_CALL_P (tmp) = 1;
39693 emit_barrier ();
39695 /* Emit just enough of rest_of_compilation to get the insns emitted.
39696 Note that use_thunk calls assemble_start_function et al. */
39697 tmp = get_insns ();
39698 shorten_branches (tmp);
39699 final_start_function (tmp, file, 1);
39700 final (tmp, file, 1);
39701 final_end_function ();
39704 static void
39705 x86_file_start (void)
39707 default_file_start ();
39708 if (TARGET_16BIT)
39709 fputs ("\t.code16gcc\n", asm_out_file);
39710 #if TARGET_MACHO
39711 darwin_file_start ();
39712 #endif
39713 if (X86_FILE_START_VERSION_DIRECTIVE)
39714 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39715 if (X86_FILE_START_FLTUSED)
39716 fputs ("\t.global\t__fltused\n", asm_out_file);
39717 if (ix86_asm_dialect == ASM_INTEL)
39718 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
39722 x86_field_alignment (tree field, int computed)
39724 enum machine_mode mode;
39725 tree type = TREE_TYPE (field);
39727 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
39728 return computed;
39729 mode = TYPE_MODE (strip_array_types (type));
39730 if (mode == DFmode || mode == DCmode
39731 || GET_MODE_CLASS (mode) == MODE_INT
39732 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
39733 return MIN (32, computed);
39734 return computed;
39737 /* Output assembler code to FILE to increment profiler label # LABELNO
39738 for profiling a function entry. */
39739 void
39740 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
39742 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
39743 : MCOUNT_NAME);
39745 if (TARGET_64BIT)
39747 #ifndef NO_PROFILE_COUNTERS
39748 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
39749 #endif
39751 if (!TARGET_PECOFF && flag_pic)
39752 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
39753 else
39754 fprintf (file, "\tcall\t%s\n", mcount_name);
39756 else if (flag_pic)
39758 #ifndef NO_PROFILE_COUNTERS
39759 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39760 LPREFIX, labelno);
39761 #endif
39762 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39764 else
39766 #ifndef NO_PROFILE_COUNTERS
39767 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39768 LPREFIX, labelno);
39769 #endif
39770 fprintf (file, "\tcall\t%s\n", mcount_name);
39774 /* We don't have exact information about the insn sizes, but we may assume
39775 quite safely that we are informed about all 1 byte insns and memory
39776 address sizes. This is enough to eliminate unnecessary padding in
39777 99% of cases. */
39779 static int
39780 min_insn_size (rtx insn)
39782 int l = 0, len;
39784 if (!INSN_P (insn) || !active_insn_p (insn))
39785 return 0;
39787 /* Discard alignments we've emit and jump instructions. */
39788 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39789 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39790 return 0;
39792 /* Important case - calls are always 5 bytes.
39793 It is common to have many calls in the row. */
39794 if (CALL_P (insn)
39795 && symbolic_reference_mentioned_p (PATTERN (insn))
39796 && !SIBLING_CALL_P (insn))
39797 return 5;
39798 len = get_attr_length (insn);
39799 if (len <= 1)
39800 return 1;
39802 /* For normal instructions we rely on get_attr_length being exact,
39803 with a few exceptions. */
39804 if (!JUMP_P (insn))
39806 enum attr_type type = get_attr_type (insn);
39808 switch (type)
39810 case TYPE_MULTI:
39811 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39812 || asm_noperands (PATTERN (insn)) >= 0)
39813 return 0;
39814 break;
39815 case TYPE_OTHER:
39816 case TYPE_FCMP:
39817 break;
39818 default:
39819 /* Otherwise trust get_attr_length. */
39820 return len;
39823 l = get_attr_length_address (insn);
39824 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39825 l = 4;
39827 if (l)
39828 return 1+l;
39829 else
39830 return 2;
39833 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39835 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39836 window. */
39838 static void
39839 ix86_avoid_jump_mispredicts (void)
39841 rtx insn, start = get_insns ();
39842 int nbytes = 0, njumps = 0;
39843 int isjump = 0;
39845 /* Look for all minimal intervals of instructions containing 4 jumps.
39846 The intervals are bounded by START and INSN. NBYTES is the total
39847 size of instructions in the interval including INSN and not including
39848 START. When the NBYTES is smaller than 16 bytes, it is possible
39849 that the end of START and INSN ends up in the same 16byte page.
39851 The smallest offset in the page INSN can start is the case where START
39852 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39853 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39855 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39856 have to, control transfer to label(s) can be performed through other
39857 means, and also we estimate minimum length of all asm stmts as 0. */
39858 for (insn = start; insn; insn = NEXT_INSN (insn))
39860 int min_size;
39862 if (LABEL_P (insn))
39864 int align = label_to_alignment (insn);
39865 int max_skip = label_to_max_skip (insn);
39867 if (max_skip > 15)
39868 max_skip = 15;
39869 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39870 already in the current 16 byte page, because otherwise
39871 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39872 bytes to reach 16 byte boundary. */
39873 if (align <= 0
39874 || (align <= 3 && max_skip != (1 << align) - 1))
39875 max_skip = 0;
39876 if (dump_file)
39877 fprintf (dump_file, "Label %i with max_skip %i\n",
39878 INSN_UID (insn), max_skip);
39879 if (max_skip)
39881 while (nbytes + max_skip >= 16)
39883 start = NEXT_INSN (start);
39884 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39885 || CALL_P (start))
39886 njumps--, isjump = 1;
39887 else
39888 isjump = 0;
39889 nbytes -= min_insn_size (start);
39892 continue;
39895 min_size = min_insn_size (insn);
39896 nbytes += min_size;
39897 if (dump_file)
39898 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39899 INSN_UID (insn), min_size);
39900 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39901 || CALL_P (insn))
39902 njumps++;
39903 else
39904 continue;
39906 while (njumps > 3)
39908 start = NEXT_INSN (start);
39909 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39910 || CALL_P (start))
39911 njumps--, isjump = 1;
39912 else
39913 isjump = 0;
39914 nbytes -= min_insn_size (start);
39916 gcc_assert (njumps >= 0);
39917 if (dump_file)
39918 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39919 INSN_UID (start), INSN_UID (insn), nbytes);
39921 if (njumps == 3 && isjump && nbytes < 16)
39923 int padsize = 15 - nbytes + min_insn_size (insn);
39925 if (dump_file)
39926 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39927 INSN_UID (insn), padsize);
39928 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39932 #endif
39934 /* AMD Athlon works faster
39935 when RET is not destination of conditional jump or directly preceded
39936 by other jump instruction. We avoid the penalty by inserting NOP just
39937 before the RET instructions in such cases. */
39938 static void
39939 ix86_pad_returns (void)
39941 edge e;
39942 edge_iterator ei;
39944 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39946 basic_block bb = e->src;
39947 rtx ret = BB_END (bb);
39948 rtx prev;
39949 bool replace = false;
39951 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39952 || optimize_bb_for_size_p (bb))
39953 continue;
39954 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39955 if (active_insn_p (prev) || LABEL_P (prev))
39956 break;
39957 if (prev && LABEL_P (prev))
39959 edge e;
39960 edge_iterator ei;
39962 FOR_EACH_EDGE (e, ei, bb->preds)
39963 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39964 && !(e->flags & EDGE_FALLTHRU))
39966 replace = true;
39967 break;
39970 if (!replace)
39972 prev = prev_active_insn (ret);
39973 if (prev
39974 && ((JUMP_P (prev) && any_condjump_p (prev))
39975 || CALL_P (prev)))
39976 replace = true;
39977 /* Empty functions get branch mispredict even when
39978 the jump destination is not visible to us. */
39979 if (!prev && !optimize_function_for_size_p (cfun))
39980 replace = true;
39982 if (replace)
39984 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39985 delete_insn (ret);
39990 /* Count the minimum number of instructions in BB. Return 4 if the
39991 number of instructions >= 4. */
39993 static int
39994 ix86_count_insn_bb (basic_block bb)
39996 rtx insn;
39997 int insn_count = 0;
39999 /* Count number of instructions in this block. Return 4 if the number
40000 of instructions >= 4. */
40001 FOR_BB_INSNS (bb, insn)
40003 /* Only happen in exit blocks. */
40004 if (JUMP_P (insn)
40005 && ANY_RETURN_P (PATTERN (insn)))
40006 break;
40008 if (NONDEBUG_INSN_P (insn)
40009 && GET_CODE (PATTERN (insn)) != USE
40010 && GET_CODE (PATTERN (insn)) != CLOBBER)
40012 insn_count++;
40013 if (insn_count >= 4)
40014 return insn_count;
40018 return insn_count;
40022 /* Count the minimum number of instructions in code path in BB.
40023 Return 4 if the number of instructions >= 4. */
40025 static int
40026 ix86_count_insn (basic_block bb)
40028 edge e;
40029 edge_iterator ei;
40030 int min_prev_count;
40032 /* Only bother counting instructions along paths with no
40033 more than 2 basic blocks between entry and exit. Given
40034 that BB has an edge to exit, determine if a predecessor
40035 of BB has an edge from entry. If so, compute the number
40036 of instructions in the predecessor block. If there
40037 happen to be multiple such blocks, compute the minimum. */
40038 min_prev_count = 4;
40039 FOR_EACH_EDGE (e, ei, bb->preds)
40041 edge prev_e;
40042 edge_iterator prev_ei;
40044 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
40046 min_prev_count = 0;
40047 break;
40049 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
40051 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
40053 int count = ix86_count_insn_bb (e->src);
40054 if (count < min_prev_count)
40055 min_prev_count = count;
40056 break;
40061 if (min_prev_count < 4)
40062 min_prev_count += ix86_count_insn_bb (bb);
40064 return min_prev_count;
40067 /* Pad short function to 4 instructions. */
40069 static void
40070 ix86_pad_short_function (void)
40072 edge e;
40073 edge_iterator ei;
40075 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40077 rtx ret = BB_END (e->src);
40078 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
40080 int insn_count = ix86_count_insn (e->src);
40082 /* Pad short function. */
40083 if (insn_count < 4)
40085 rtx insn = ret;
40087 /* Find epilogue. */
40088 while (insn
40089 && (!NOTE_P (insn)
40090 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
40091 insn = PREV_INSN (insn);
40093 if (!insn)
40094 insn = ret;
40096 /* Two NOPs count as one instruction. */
40097 insn_count = 2 * (4 - insn_count);
40098 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
40104 /* Fix up a Windows system unwinder issue. If an EH region falls through into
40105 the epilogue, the Windows system unwinder will apply epilogue logic and
40106 produce incorrect offsets. This can be avoided by adding a nop between
40107 the last insn that can throw and the first insn of the epilogue. */
40109 static void
40110 ix86_seh_fixup_eh_fallthru (void)
40112 edge e;
40113 edge_iterator ei;
40115 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40117 rtx insn, next;
40119 /* Find the beginning of the epilogue. */
40120 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
40121 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
40122 break;
40123 if (insn == NULL)
40124 continue;
40126 /* We only care about preceding insns that can throw. */
40127 insn = prev_active_insn (insn);
40128 if (insn == NULL || !can_throw_internal (insn))
40129 continue;
40131 /* Do not separate calls from their debug information. */
40132 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
40133 if (NOTE_P (next)
40134 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
40135 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
40136 insn = next;
40137 else
40138 break;
40140 emit_insn_after (gen_nops (const1_rtx), insn);
40144 /* Implement machine specific optimizations. We implement padding of returns
40145 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
40146 static void
40147 ix86_reorg (void)
40149 /* We are freeing block_for_insn in the toplev to keep compatibility
40150 with old MDEP_REORGS that are not CFG based. Recompute it now. */
40151 compute_bb_for_insn ();
40153 if (TARGET_SEH && current_function_has_exception_handlers ())
40154 ix86_seh_fixup_eh_fallthru ();
40156 if (optimize && optimize_function_for_speed_p (cfun))
40158 if (TARGET_PAD_SHORT_FUNCTION)
40159 ix86_pad_short_function ();
40160 else if (TARGET_PAD_RETURNS)
40161 ix86_pad_returns ();
40162 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
40163 if (TARGET_FOUR_JUMP_LIMIT)
40164 ix86_avoid_jump_mispredicts ();
40165 #endif
40169 /* Return nonzero when QImode register that must be represented via REX prefix
40170 is used. */
40171 bool
40172 x86_extended_QIreg_mentioned_p (rtx insn)
40174 int i;
40175 extract_insn_cached (insn);
40176 for (i = 0; i < recog_data.n_operands; i++)
40177 if (GENERAL_REG_P (recog_data.operand[i])
40178 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
40179 return true;
40180 return false;
40183 /* Return nonzero when P points to register encoded via REX prefix.
40184 Called via for_each_rtx. */
40185 static int
40186 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
40188 unsigned int regno;
40189 if (!REG_P (*p))
40190 return 0;
40191 regno = REGNO (*p);
40192 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
40195 /* Return true when INSN mentions register that must be encoded using REX
40196 prefix. */
40197 bool
40198 x86_extended_reg_mentioned_p (rtx insn)
40200 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
40201 extended_reg_mentioned_1, NULL);
40204 /* If profitable, negate (without causing overflow) integer constant
40205 of mode MODE at location LOC. Return true in this case. */
40206 bool
40207 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
40209 HOST_WIDE_INT val;
40211 if (!CONST_INT_P (*loc))
40212 return false;
40214 switch (mode)
40216 case DImode:
40217 /* DImode x86_64 constants must fit in 32 bits. */
40218 gcc_assert (x86_64_immediate_operand (*loc, mode));
40220 mode = SImode;
40221 break;
40223 case SImode:
40224 case HImode:
40225 case QImode:
40226 break;
40228 default:
40229 gcc_unreachable ();
40232 /* Avoid overflows. */
40233 if (mode_signbit_p (mode, *loc))
40234 return false;
40236 val = INTVAL (*loc);
40238 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
40239 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
40240 if ((val < 0 && val != -128)
40241 || val == 128)
40243 *loc = GEN_INT (-val);
40244 return true;
40247 return false;
40250 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
40251 optabs would emit if we didn't have TFmode patterns. */
40253 void
40254 x86_emit_floatuns (rtx operands[2])
40256 rtx neglab, donelab, i0, i1, f0, in, out;
40257 enum machine_mode mode, inmode;
40259 inmode = GET_MODE (operands[1]);
40260 gcc_assert (inmode == SImode || inmode == DImode);
40262 out = operands[0];
40263 in = force_reg (inmode, operands[1]);
40264 mode = GET_MODE (out);
40265 neglab = gen_label_rtx ();
40266 donelab = gen_label_rtx ();
40267 f0 = gen_reg_rtx (mode);
40269 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
40271 expand_float (out, in, 0);
40273 emit_jump_insn (gen_jump (donelab));
40274 emit_barrier ();
40276 emit_label (neglab);
40278 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
40279 1, OPTAB_DIRECT);
40280 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
40281 1, OPTAB_DIRECT);
40282 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
40284 expand_float (f0, i0, 0);
40286 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
40288 emit_label (donelab);
40291 /* AVX512F does support 64-byte integer vector operations,
40292 thus the longest vector we are faced with is V64QImode. */
40293 #define MAX_VECT_LEN 64
40295 struct expand_vec_perm_d
40297 rtx target, op0, op1;
40298 unsigned char perm[MAX_VECT_LEN];
40299 enum machine_mode vmode;
40300 unsigned char nelt;
40301 bool one_operand_p;
40302 bool testing_p;
40305 static bool canonicalize_perm (struct expand_vec_perm_d *d);
40306 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
40307 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
40309 /* Get a vector mode of the same size as the original but with elements
40310 twice as wide. This is only guaranteed to apply to integral vectors. */
40312 static inline enum machine_mode
40313 get_mode_wider_vector (enum machine_mode o)
40315 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
40316 enum machine_mode n = GET_MODE_WIDER_MODE (o);
40317 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
40318 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
40319 return n;
40322 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
40323 fill target with val via vec_duplicate. */
40325 static bool
40326 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
40328 bool ok;
40329 rtx insn, dup;
40331 /* First attempt to recognize VAL as-is. */
40332 dup = gen_rtx_VEC_DUPLICATE (mode, val);
40333 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
40334 if (recog_memoized (insn) < 0)
40336 rtx seq;
40337 /* If that fails, force VAL into a register. */
40339 start_sequence ();
40340 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
40341 seq = get_insns ();
40342 end_sequence ();
40343 if (seq)
40344 emit_insn_before (seq, insn);
40346 ok = recog_memoized (insn) >= 0;
40347 gcc_assert (ok);
40349 return true;
40352 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40353 with all elements equal to VAR. Return true if successful. */
40355 static bool
40356 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
40357 rtx target, rtx val)
40359 bool ok;
40361 switch (mode)
40363 case V2SImode:
40364 case V2SFmode:
40365 if (!mmx_ok)
40366 return false;
40367 /* FALLTHRU */
40369 case V4DFmode:
40370 case V4DImode:
40371 case V8SFmode:
40372 case V8SImode:
40373 case V2DFmode:
40374 case V2DImode:
40375 case V4SFmode:
40376 case V4SImode:
40377 case V16SImode:
40378 case V8DImode:
40379 case V16SFmode:
40380 case V8DFmode:
40381 return ix86_vector_duplicate_value (mode, target, val);
40383 case V4HImode:
40384 if (!mmx_ok)
40385 return false;
40386 if (TARGET_SSE || TARGET_3DNOW_A)
40388 rtx x;
40390 val = gen_lowpart (SImode, val);
40391 x = gen_rtx_TRUNCATE (HImode, val);
40392 x = gen_rtx_VEC_DUPLICATE (mode, x);
40393 emit_insn (gen_rtx_SET (VOIDmode, target, x));
40394 return true;
40396 goto widen;
40398 case V8QImode:
40399 if (!mmx_ok)
40400 return false;
40401 goto widen;
40403 case V8HImode:
40404 if (TARGET_SSE2)
40406 struct expand_vec_perm_d dperm;
40407 rtx tmp1, tmp2;
40409 permute:
40410 memset (&dperm, 0, sizeof (dperm));
40411 dperm.target = target;
40412 dperm.vmode = mode;
40413 dperm.nelt = GET_MODE_NUNITS (mode);
40414 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
40415 dperm.one_operand_p = true;
40417 /* Extend to SImode using a paradoxical SUBREG. */
40418 tmp1 = gen_reg_rtx (SImode);
40419 emit_move_insn (tmp1, gen_lowpart (SImode, val));
40421 /* Insert the SImode value as low element of a V4SImode vector. */
40422 tmp2 = gen_reg_rtx (V4SImode);
40423 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
40424 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
40426 ok = (expand_vec_perm_1 (&dperm)
40427 || expand_vec_perm_broadcast_1 (&dperm));
40428 gcc_assert (ok);
40429 return ok;
40431 goto widen;
40433 case V16QImode:
40434 if (TARGET_SSE2)
40435 goto permute;
40436 goto widen;
40438 widen:
40439 /* Replicate the value once into the next wider mode and recurse. */
40441 enum machine_mode smode, wsmode, wvmode;
40442 rtx x;
40444 smode = GET_MODE_INNER (mode);
40445 wvmode = get_mode_wider_vector (mode);
40446 wsmode = GET_MODE_INNER (wvmode);
40448 val = convert_modes (wsmode, smode, val, true);
40449 x = expand_simple_binop (wsmode, ASHIFT, val,
40450 GEN_INT (GET_MODE_BITSIZE (smode)),
40451 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40452 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
40454 x = gen_reg_rtx (wvmode);
40455 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
40456 gcc_assert (ok);
40457 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
40458 return ok;
40461 case V16HImode:
40462 case V32QImode:
40464 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
40465 rtx x = gen_reg_rtx (hvmode);
40467 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
40468 gcc_assert (ok);
40470 x = gen_rtx_VEC_CONCAT (mode, x, x);
40471 emit_insn (gen_rtx_SET (VOIDmode, target, x));
40473 return true;
40475 default:
40476 return false;
40480 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40481 whose ONE_VAR element is VAR, and other elements are zero. Return true
40482 if successful. */
40484 static bool
40485 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
40486 rtx target, rtx var, int one_var)
40488 enum machine_mode vsimode;
40489 rtx new_target;
40490 rtx x, tmp;
40491 bool use_vector_set = false;
40493 switch (mode)
40495 case V2DImode:
40496 /* For SSE4.1, we normally use vector set. But if the second
40497 element is zero and inter-unit moves are OK, we use movq
40498 instead. */
40499 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
40500 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
40501 && one_var == 0));
40502 break;
40503 case V16QImode:
40504 case V4SImode:
40505 case V4SFmode:
40506 use_vector_set = TARGET_SSE4_1;
40507 break;
40508 case V8HImode:
40509 use_vector_set = TARGET_SSE2;
40510 break;
40511 case V4HImode:
40512 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
40513 break;
40514 case V32QImode:
40515 case V16HImode:
40516 case V8SImode:
40517 case V8SFmode:
40518 case V4DFmode:
40519 use_vector_set = TARGET_AVX;
40520 break;
40521 case V4DImode:
40522 /* Use ix86_expand_vector_set in 64bit mode only. */
40523 use_vector_set = TARGET_AVX && TARGET_64BIT;
40524 break;
40525 default:
40526 break;
40529 if (use_vector_set)
40531 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
40532 var = force_reg (GET_MODE_INNER (mode), var);
40533 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40534 return true;
40537 switch (mode)
40539 case V2SFmode:
40540 case V2SImode:
40541 if (!mmx_ok)
40542 return false;
40543 /* FALLTHRU */
40545 case V2DFmode:
40546 case V2DImode:
40547 if (one_var != 0)
40548 return false;
40549 var = force_reg (GET_MODE_INNER (mode), var);
40550 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
40551 emit_insn (gen_rtx_SET (VOIDmode, target, x));
40552 return true;
40554 case V4SFmode:
40555 case V4SImode:
40556 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
40557 new_target = gen_reg_rtx (mode);
40558 else
40559 new_target = target;
40560 var = force_reg (GET_MODE_INNER (mode), var);
40561 x = gen_rtx_VEC_DUPLICATE (mode, var);
40562 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
40563 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
40564 if (one_var != 0)
40566 /* We need to shuffle the value to the correct position, so
40567 create a new pseudo to store the intermediate result. */
40569 /* With SSE2, we can use the integer shuffle insns. */
40570 if (mode != V4SFmode && TARGET_SSE2)
40572 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
40573 const1_rtx,
40574 GEN_INT (one_var == 1 ? 0 : 1),
40575 GEN_INT (one_var == 2 ? 0 : 1),
40576 GEN_INT (one_var == 3 ? 0 : 1)));
40577 if (target != new_target)
40578 emit_move_insn (target, new_target);
40579 return true;
40582 /* Otherwise convert the intermediate result to V4SFmode and
40583 use the SSE1 shuffle instructions. */
40584 if (mode != V4SFmode)
40586 tmp = gen_reg_rtx (V4SFmode);
40587 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
40589 else
40590 tmp = new_target;
40592 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
40593 const1_rtx,
40594 GEN_INT (one_var == 1 ? 0 : 1),
40595 GEN_INT (one_var == 2 ? 0+4 : 1+4),
40596 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
40598 if (mode != V4SFmode)
40599 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
40600 else if (tmp != target)
40601 emit_move_insn (target, tmp);
40603 else if (target != new_target)
40604 emit_move_insn (target, new_target);
40605 return true;
40607 case V8HImode:
40608 case V16QImode:
40609 vsimode = V4SImode;
40610 goto widen;
40611 case V4HImode:
40612 case V8QImode:
40613 if (!mmx_ok)
40614 return false;
40615 vsimode = V2SImode;
40616 goto widen;
40617 widen:
40618 if (one_var != 0)
40619 return false;
40621 /* Zero extend the variable element to SImode and recurse. */
40622 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
40624 x = gen_reg_rtx (vsimode);
40625 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
40626 var, one_var))
40627 gcc_unreachable ();
40629 emit_move_insn (target, gen_lowpart (mode, x));
40630 return true;
40632 default:
40633 return false;
40637 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40638 consisting of the values in VALS. It is known that all elements
40639 except ONE_VAR are constants. Return true if successful. */
40641 static bool
40642 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
40643 rtx target, rtx vals, int one_var)
40645 rtx var = XVECEXP (vals, 0, one_var);
40646 enum machine_mode wmode;
40647 rtx const_vec, x;
40649 const_vec = copy_rtx (vals);
40650 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
40651 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
40653 switch (mode)
40655 case V2DFmode:
40656 case V2DImode:
40657 case V2SFmode:
40658 case V2SImode:
40659 /* For the two element vectors, it's just as easy to use
40660 the general case. */
40661 return false;
40663 case V4DImode:
40664 /* Use ix86_expand_vector_set in 64bit mode only. */
40665 if (!TARGET_64BIT)
40666 return false;
40667 case V4DFmode:
40668 case V8SFmode:
40669 case V8SImode:
40670 case V16HImode:
40671 case V32QImode:
40672 case V4SFmode:
40673 case V4SImode:
40674 case V8HImode:
40675 case V4HImode:
40676 break;
40678 case V16QImode:
40679 if (TARGET_SSE4_1)
40680 break;
40681 wmode = V8HImode;
40682 goto widen;
40683 case V8QImode:
40684 wmode = V4HImode;
40685 goto widen;
40686 widen:
40687 /* There's no way to set one QImode entry easily. Combine
40688 the variable value with its adjacent constant value, and
40689 promote to an HImode set. */
40690 x = XVECEXP (vals, 0, one_var ^ 1);
40691 if (one_var & 1)
40693 var = convert_modes (HImode, QImode, var, true);
40694 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
40695 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40696 x = GEN_INT (INTVAL (x) & 0xff);
40698 else
40700 var = convert_modes (HImode, QImode, var, true);
40701 x = gen_int_mode (INTVAL (x) << 8, HImode);
40703 if (x != const0_rtx)
40704 var = expand_simple_binop (HImode, IOR, var, x, var,
40705 1, OPTAB_LIB_WIDEN);
40707 x = gen_reg_rtx (wmode);
40708 emit_move_insn (x, gen_lowpart (wmode, const_vec));
40709 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
40711 emit_move_insn (target, gen_lowpart (mode, x));
40712 return true;
40714 default:
40715 return false;
40718 emit_move_insn (target, const_vec);
40719 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40720 return true;
40723 /* A subroutine of ix86_expand_vector_init_general. Use vector
40724 concatenate to handle the most general case: all values variable,
40725 and none identical. */
40727 static void
40728 ix86_expand_vector_init_concat (enum machine_mode mode,
40729 rtx target, rtx *ops, int n)
40731 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
40732 rtx first[16], second[8], third[4];
40733 rtvec v;
40734 int i, j;
40736 switch (n)
40738 case 2:
40739 switch (mode)
40741 case V16SImode:
40742 cmode = V8SImode;
40743 break;
40744 case V16SFmode:
40745 cmode = V8SFmode;
40746 break;
40747 case V8DImode:
40748 cmode = V4DImode;
40749 break;
40750 case V8DFmode:
40751 cmode = V4DFmode;
40752 break;
40753 case V8SImode:
40754 cmode = V4SImode;
40755 break;
40756 case V8SFmode:
40757 cmode = V4SFmode;
40758 break;
40759 case V4DImode:
40760 cmode = V2DImode;
40761 break;
40762 case V4DFmode:
40763 cmode = V2DFmode;
40764 break;
40765 case V4SImode:
40766 cmode = V2SImode;
40767 break;
40768 case V4SFmode:
40769 cmode = V2SFmode;
40770 break;
40771 case V2DImode:
40772 cmode = DImode;
40773 break;
40774 case V2SImode:
40775 cmode = SImode;
40776 break;
40777 case V2DFmode:
40778 cmode = DFmode;
40779 break;
40780 case V2SFmode:
40781 cmode = SFmode;
40782 break;
40783 default:
40784 gcc_unreachable ();
40787 if (!register_operand (ops[1], cmode))
40788 ops[1] = force_reg (cmode, ops[1]);
40789 if (!register_operand (ops[0], cmode))
40790 ops[0] = force_reg (cmode, ops[0]);
40791 emit_insn (gen_rtx_SET (VOIDmode, target,
40792 gen_rtx_VEC_CONCAT (mode, ops[0],
40793 ops[1])));
40794 break;
40796 case 4:
40797 switch (mode)
40799 case V4DImode:
40800 cmode = V2DImode;
40801 break;
40802 case V4DFmode:
40803 cmode = V2DFmode;
40804 break;
40805 case V4SImode:
40806 cmode = V2SImode;
40807 break;
40808 case V4SFmode:
40809 cmode = V2SFmode;
40810 break;
40811 default:
40812 gcc_unreachable ();
40814 goto half;
40816 case 8:
40817 switch (mode)
40819 case V8DImode:
40820 cmode = V2DImode;
40821 hmode = V4DImode;
40822 break;
40823 case V8DFmode:
40824 cmode = V2DFmode;
40825 hmode = V4DFmode;
40826 break;
40827 case V8SImode:
40828 cmode = V2SImode;
40829 hmode = V4SImode;
40830 break;
40831 case V8SFmode:
40832 cmode = V2SFmode;
40833 hmode = V4SFmode;
40834 break;
40835 default:
40836 gcc_unreachable ();
40838 goto half;
40840 case 16:
40841 switch (mode)
40843 case V16SImode:
40844 cmode = V2SImode;
40845 hmode = V4SImode;
40846 gmode = V8SImode;
40847 break;
40848 case V16SFmode:
40849 cmode = V2SFmode;
40850 hmode = V4SFmode;
40851 gmode = V8SFmode;
40852 break;
40853 default:
40854 gcc_unreachable ();
40856 goto half;
40858 half:
40859 /* FIXME: We process inputs backward to help RA. PR 36222. */
40860 i = n - 1;
40861 j = (n >> 1) - 1;
40862 for (; i > 0; i -= 2, j--)
40864 first[j] = gen_reg_rtx (cmode);
40865 v = gen_rtvec (2, ops[i - 1], ops[i]);
40866 ix86_expand_vector_init (false, first[j],
40867 gen_rtx_PARALLEL (cmode, v));
40870 n >>= 1;
40871 if (n > 4)
40873 gcc_assert (hmode != VOIDmode);
40874 gcc_assert (gmode != VOIDmode);
40875 for (i = j = 0; i < n; i += 2, j++)
40877 second[j] = gen_reg_rtx (hmode);
40878 ix86_expand_vector_init_concat (hmode, second [j],
40879 &first [i], 2);
40881 n >>= 1;
40882 for (i = j = 0; i < n; i += 2, j++)
40884 third[j] = gen_reg_rtx (gmode);
40885 ix86_expand_vector_init_concat (gmode, third[j],
40886 &second[i], 2);
40888 n >>= 1;
40889 ix86_expand_vector_init_concat (mode, target, third, n);
40891 else if (n > 2)
40893 gcc_assert (hmode != VOIDmode);
40894 for (i = j = 0; i < n; i += 2, j++)
40896 second[j] = gen_reg_rtx (hmode);
40897 ix86_expand_vector_init_concat (hmode, second [j],
40898 &first [i], 2);
40900 n >>= 1;
40901 ix86_expand_vector_init_concat (mode, target, second, n);
40903 else
40904 ix86_expand_vector_init_concat (mode, target, first, n);
40905 break;
40907 default:
40908 gcc_unreachable ();
40912 /* A subroutine of ix86_expand_vector_init_general. Use vector
40913 interleave to handle the most general case: all values variable,
40914 and none identical. */
40916 static void
40917 ix86_expand_vector_init_interleave (enum machine_mode mode,
40918 rtx target, rtx *ops, int n)
40920 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40921 int i, j;
40922 rtx op0, op1;
40923 rtx (*gen_load_even) (rtx, rtx, rtx);
40924 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40925 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40927 switch (mode)
40929 case V8HImode:
40930 gen_load_even = gen_vec_setv8hi;
40931 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40932 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40933 inner_mode = HImode;
40934 first_imode = V4SImode;
40935 second_imode = V2DImode;
40936 third_imode = VOIDmode;
40937 break;
40938 case V16QImode:
40939 gen_load_even = gen_vec_setv16qi;
40940 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40941 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40942 inner_mode = QImode;
40943 first_imode = V8HImode;
40944 second_imode = V4SImode;
40945 third_imode = V2DImode;
40946 break;
40947 default:
40948 gcc_unreachable ();
40951 for (i = 0; i < n; i++)
40953 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40954 op0 = gen_reg_rtx (SImode);
40955 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40957 /* Insert the SImode value as low element of V4SImode vector. */
40958 op1 = gen_reg_rtx (V4SImode);
40959 op0 = gen_rtx_VEC_MERGE (V4SImode,
40960 gen_rtx_VEC_DUPLICATE (V4SImode,
40961 op0),
40962 CONST0_RTX (V4SImode),
40963 const1_rtx);
40964 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40966 /* Cast the V4SImode vector back to a vector in orignal mode. */
40967 op0 = gen_reg_rtx (mode);
40968 emit_move_insn (op0, gen_lowpart (mode, op1));
40970 /* Load even elements into the second position. */
40971 emit_insn (gen_load_even (op0,
40972 force_reg (inner_mode,
40973 ops [i + i + 1]),
40974 const1_rtx));
40976 /* Cast vector to FIRST_IMODE vector. */
40977 ops[i] = gen_reg_rtx (first_imode);
40978 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40981 /* Interleave low FIRST_IMODE vectors. */
40982 for (i = j = 0; i < n; i += 2, j++)
40984 op0 = gen_reg_rtx (first_imode);
40985 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40987 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40988 ops[j] = gen_reg_rtx (second_imode);
40989 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40992 /* Interleave low SECOND_IMODE vectors. */
40993 switch (second_imode)
40995 case V4SImode:
40996 for (i = j = 0; i < n / 2; i += 2, j++)
40998 op0 = gen_reg_rtx (second_imode);
40999 emit_insn (gen_interleave_second_low (op0, ops[i],
41000 ops[i + 1]));
41002 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
41003 vector. */
41004 ops[j] = gen_reg_rtx (third_imode);
41005 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
41007 second_imode = V2DImode;
41008 gen_interleave_second_low = gen_vec_interleave_lowv2di;
41009 /* FALLTHRU */
41011 case V2DImode:
41012 op0 = gen_reg_rtx (second_imode);
41013 emit_insn (gen_interleave_second_low (op0, ops[0],
41014 ops[1]));
41016 /* Cast the SECOND_IMODE vector back to a vector on original
41017 mode. */
41018 emit_insn (gen_rtx_SET (VOIDmode, target,
41019 gen_lowpart (mode, op0)));
41020 break;
41022 default:
41023 gcc_unreachable ();
41027 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
41028 all values variable, and none identical. */
41030 static void
41031 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
41032 rtx target, rtx vals)
41034 rtx ops[64], op0, op1;
41035 enum machine_mode half_mode = VOIDmode;
41036 int n, i;
41038 switch (mode)
41040 case V2SFmode:
41041 case V2SImode:
41042 if (!mmx_ok && !TARGET_SSE)
41043 break;
41044 /* FALLTHRU */
41046 case V16SImode:
41047 case V16SFmode:
41048 case V8DFmode:
41049 case V8DImode:
41050 case V8SFmode:
41051 case V8SImode:
41052 case V4DFmode:
41053 case V4DImode:
41054 case V4SFmode:
41055 case V4SImode:
41056 case V2DFmode:
41057 case V2DImode:
41058 n = GET_MODE_NUNITS (mode);
41059 for (i = 0; i < n; i++)
41060 ops[i] = XVECEXP (vals, 0, i);
41061 ix86_expand_vector_init_concat (mode, target, ops, n);
41062 return;
41064 case V32QImode:
41065 half_mode = V16QImode;
41066 goto half;
41068 case V16HImode:
41069 half_mode = V8HImode;
41070 goto half;
41072 half:
41073 n = GET_MODE_NUNITS (mode);
41074 for (i = 0; i < n; i++)
41075 ops[i] = XVECEXP (vals, 0, i);
41076 op0 = gen_reg_rtx (half_mode);
41077 op1 = gen_reg_rtx (half_mode);
41078 ix86_expand_vector_init_interleave (half_mode, op0, ops,
41079 n >> 2);
41080 ix86_expand_vector_init_interleave (half_mode, op1,
41081 &ops [n >> 1], n >> 2);
41082 emit_insn (gen_rtx_SET (VOIDmode, target,
41083 gen_rtx_VEC_CONCAT (mode, op0, op1)));
41084 return;
41086 case V16QImode:
41087 if (!TARGET_SSE4_1)
41088 break;
41089 /* FALLTHRU */
41091 case V8HImode:
41092 if (!TARGET_SSE2)
41093 break;
41095 /* Don't use ix86_expand_vector_init_interleave if we can't
41096 move from GPR to SSE register directly. */
41097 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
41098 break;
41100 n = GET_MODE_NUNITS (mode);
41101 for (i = 0; i < n; i++)
41102 ops[i] = XVECEXP (vals, 0, i);
41103 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
41104 return;
41106 case V4HImode:
41107 case V8QImode:
41108 break;
41110 default:
41111 gcc_unreachable ();
41115 int i, j, n_elts, n_words, n_elt_per_word;
41116 enum machine_mode inner_mode;
41117 rtx words[4], shift;
41119 inner_mode = GET_MODE_INNER (mode);
41120 n_elts = GET_MODE_NUNITS (mode);
41121 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
41122 n_elt_per_word = n_elts / n_words;
41123 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
41125 for (i = 0; i < n_words; ++i)
41127 rtx word = NULL_RTX;
41129 for (j = 0; j < n_elt_per_word; ++j)
41131 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
41132 elt = convert_modes (word_mode, inner_mode, elt, true);
41134 if (j == 0)
41135 word = elt;
41136 else
41138 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
41139 word, 1, OPTAB_LIB_WIDEN);
41140 word = expand_simple_binop (word_mode, IOR, word, elt,
41141 word, 1, OPTAB_LIB_WIDEN);
41145 words[i] = word;
41148 if (n_words == 1)
41149 emit_move_insn (target, gen_lowpart (mode, words[0]));
41150 else if (n_words == 2)
41152 rtx tmp = gen_reg_rtx (mode);
41153 emit_clobber (tmp);
41154 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
41155 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
41156 emit_move_insn (target, tmp);
41158 else if (n_words == 4)
41160 rtx tmp = gen_reg_rtx (V4SImode);
41161 gcc_assert (word_mode == SImode);
41162 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
41163 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
41164 emit_move_insn (target, gen_lowpart (mode, tmp));
41166 else
41167 gcc_unreachable ();
41171 /* Initialize vector TARGET via VALS. Suppress the use of MMX
41172 instructions unless MMX_OK is true. */
41174 void
41175 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
41177 enum machine_mode mode = GET_MODE (target);
41178 enum machine_mode inner_mode = GET_MODE_INNER (mode);
41179 int n_elts = GET_MODE_NUNITS (mode);
41180 int n_var = 0, one_var = -1;
41181 bool all_same = true, all_const_zero = true;
41182 int i;
41183 rtx x;
41185 for (i = 0; i < n_elts; ++i)
41187 x = XVECEXP (vals, 0, i);
41188 if (!(CONST_INT_P (x)
41189 || GET_CODE (x) == CONST_DOUBLE
41190 || GET_CODE (x) == CONST_FIXED))
41191 n_var++, one_var = i;
41192 else if (x != CONST0_RTX (inner_mode))
41193 all_const_zero = false;
41194 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
41195 all_same = false;
41198 /* Constants are best loaded from the constant pool. */
41199 if (n_var == 0)
41201 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
41202 return;
41205 /* If all values are identical, broadcast the value. */
41206 if (all_same
41207 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
41208 XVECEXP (vals, 0, 0)))
41209 return;
41211 /* Values where only one field is non-constant are best loaded from
41212 the pool and overwritten via move later. */
41213 if (n_var == 1)
41215 if (all_const_zero
41216 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
41217 XVECEXP (vals, 0, one_var),
41218 one_var))
41219 return;
41221 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
41222 return;
41225 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
41228 void
41229 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
41231 enum machine_mode mode = GET_MODE (target);
41232 enum machine_mode inner_mode = GET_MODE_INNER (mode);
41233 enum machine_mode half_mode;
41234 bool use_vec_merge = false;
41235 rtx tmp;
41236 static rtx (*gen_extract[6][2]) (rtx, rtx)
41238 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
41239 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
41240 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
41241 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
41242 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
41243 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
41245 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
41247 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
41248 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
41249 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
41250 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
41251 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
41252 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
41254 int i, j, n;
41256 switch (mode)
41258 case V2SFmode:
41259 case V2SImode:
41260 if (mmx_ok)
41262 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
41263 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
41264 if (elt == 0)
41265 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
41266 else
41267 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
41268 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41269 return;
41271 break;
41273 case V2DImode:
41274 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
41275 if (use_vec_merge)
41276 break;
41278 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
41279 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
41280 if (elt == 0)
41281 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
41282 else
41283 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
41284 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41285 return;
41287 case V2DFmode:
41289 rtx op0, op1;
41291 /* For the two element vectors, we implement a VEC_CONCAT with
41292 the extraction of the other element. */
41294 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
41295 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
41297 if (elt == 0)
41298 op0 = val, op1 = tmp;
41299 else
41300 op0 = tmp, op1 = val;
41302 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
41303 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41305 return;
41307 case V4SFmode:
41308 use_vec_merge = TARGET_SSE4_1;
41309 if (use_vec_merge)
41310 break;
41312 switch (elt)
41314 case 0:
41315 use_vec_merge = true;
41316 break;
41318 case 1:
41319 /* tmp = target = A B C D */
41320 tmp = copy_to_reg (target);
41321 /* target = A A B B */
41322 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
41323 /* target = X A B B */
41324 ix86_expand_vector_set (false, target, val, 0);
41325 /* target = A X C D */
41326 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
41327 const1_rtx, const0_rtx,
41328 GEN_INT (2+4), GEN_INT (3+4)));
41329 return;
41331 case 2:
41332 /* tmp = target = A B C D */
41333 tmp = copy_to_reg (target);
41334 /* tmp = X B C D */
41335 ix86_expand_vector_set (false, tmp, val, 0);
41336 /* target = A B X D */
41337 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
41338 const0_rtx, const1_rtx,
41339 GEN_INT (0+4), GEN_INT (3+4)));
41340 return;
41342 case 3:
41343 /* tmp = target = A B C D */
41344 tmp = copy_to_reg (target);
41345 /* tmp = X B C D */
41346 ix86_expand_vector_set (false, tmp, val, 0);
41347 /* target = A B X D */
41348 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
41349 const0_rtx, const1_rtx,
41350 GEN_INT (2+4), GEN_INT (0+4)));
41351 return;
41353 default:
41354 gcc_unreachable ();
41356 break;
41358 case V4SImode:
41359 use_vec_merge = TARGET_SSE4_1;
41360 if (use_vec_merge)
41361 break;
41363 /* Element 0 handled by vec_merge below. */
41364 if (elt == 0)
41366 use_vec_merge = true;
41367 break;
41370 if (TARGET_SSE2)
41372 /* With SSE2, use integer shuffles to swap element 0 and ELT,
41373 store into element 0, then shuffle them back. */
41375 rtx order[4];
41377 order[0] = GEN_INT (elt);
41378 order[1] = const1_rtx;
41379 order[2] = const2_rtx;
41380 order[3] = GEN_INT (3);
41381 order[elt] = const0_rtx;
41383 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
41384 order[1], order[2], order[3]));
41386 ix86_expand_vector_set (false, target, val, 0);
41388 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
41389 order[1], order[2], order[3]));
41391 else
41393 /* For SSE1, we have to reuse the V4SF code. */
41394 rtx t = gen_reg_rtx (V4SFmode);
41395 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
41396 emit_move_insn (target, gen_lowpart (mode, t));
41398 return;
41400 case V8HImode:
41401 use_vec_merge = TARGET_SSE2;
41402 break;
41403 case V4HImode:
41404 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
41405 break;
41407 case V16QImode:
41408 use_vec_merge = TARGET_SSE4_1;
41409 break;
41411 case V8QImode:
41412 break;
41414 case V32QImode:
41415 half_mode = V16QImode;
41416 j = 0;
41417 n = 16;
41418 goto half;
41420 case V16HImode:
41421 half_mode = V8HImode;
41422 j = 1;
41423 n = 8;
41424 goto half;
41426 case V8SImode:
41427 half_mode = V4SImode;
41428 j = 2;
41429 n = 4;
41430 goto half;
41432 case V4DImode:
41433 half_mode = V2DImode;
41434 j = 3;
41435 n = 2;
41436 goto half;
41438 case V8SFmode:
41439 half_mode = V4SFmode;
41440 j = 4;
41441 n = 4;
41442 goto half;
41444 case V4DFmode:
41445 half_mode = V2DFmode;
41446 j = 5;
41447 n = 2;
41448 goto half;
41450 half:
41451 /* Compute offset. */
41452 i = elt / n;
41453 elt %= n;
41455 gcc_assert (i <= 1);
41457 /* Extract the half. */
41458 tmp = gen_reg_rtx (half_mode);
41459 emit_insn (gen_extract[j][i] (tmp, target));
41461 /* Put val in tmp at elt. */
41462 ix86_expand_vector_set (false, tmp, val, elt);
41464 /* Put it back. */
41465 emit_insn (gen_insert[j][i] (target, target, tmp));
41466 return;
41468 default:
41469 break;
41472 if (use_vec_merge)
41474 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
41475 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
41476 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41478 else
41480 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41482 emit_move_insn (mem, target);
41484 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41485 emit_move_insn (tmp, val);
41487 emit_move_insn (target, mem);
41491 void
41492 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
41494 enum machine_mode mode = GET_MODE (vec);
41495 enum machine_mode inner_mode = GET_MODE_INNER (mode);
41496 bool use_vec_extr = false;
41497 rtx tmp;
41499 switch (mode)
41501 case V2SImode:
41502 case V2SFmode:
41503 if (!mmx_ok)
41504 break;
41505 /* FALLTHRU */
41507 case V2DFmode:
41508 case V2DImode:
41509 use_vec_extr = true;
41510 break;
41512 case V4SFmode:
41513 use_vec_extr = TARGET_SSE4_1;
41514 if (use_vec_extr)
41515 break;
41517 switch (elt)
41519 case 0:
41520 tmp = vec;
41521 break;
41523 case 1:
41524 case 3:
41525 tmp = gen_reg_rtx (mode);
41526 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
41527 GEN_INT (elt), GEN_INT (elt),
41528 GEN_INT (elt+4), GEN_INT (elt+4)));
41529 break;
41531 case 2:
41532 tmp = gen_reg_rtx (mode);
41533 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
41534 break;
41536 default:
41537 gcc_unreachable ();
41539 vec = tmp;
41540 use_vec_extr = true;
41541 elt = 0;
41542 break;
41544 case V4SImode:
41545 use_vec_extr = TARGET_SSE4_1;
41546 if (use_vec_extr)
41547 break;
41549 if (TARGET_SSE2)
41551 switch (elt)
41553 case 0:
41554 tmp = vec;
41555 break;
41557 case 1:
41558 case 3:
41559 tmp = gen_reg_rtx (mode);
41560 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
41561 GEN_INT (elt), GEN_INT (elt),
41562 GEN_INT (elt), GEN_INT (elt)));
41563 break;
41565 case 2:
41566 tmp = gen_reg_rtx (mode);
41567 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
41568 break;
41570 default:
41571 gcc_unreachable ();
41573 vec = tmp;
41574 use_vec_extr = true;
41575 elt = 0;
41577 else
41579 /* For SSE1, we have to reuse the V4SF code. */
41580 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
41581 gen_lowpart (V4SFmode, vec), elt);
41582 return;
41584 break;
41586 case V8HImode:
41587 use_vec_extr = TARGET_SSE2;
41588 break;
41589 case V4HImode:
41590 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
41591 break;
41593 case V16QImode:
41594 use_vec_extr = TARGET_SSE4_1;
41595 break;
41597 case V8SFmode:
41598 if (TARGET_AVX)
41600 tmp = gen_reg_rtx (V4SFmode);
41601 if (elt < 4)
41602 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
41603 else
41604 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
41605 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41606 return;
41608 break;
41610 case V4DFmode:
41611 if (TARGET_AVX)
41613 tmp = gen_reg_rtx (V2DFmode);
41614 if (elt < 2)
41615 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
41616 else
41617 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
41618 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41619 return;
41621 break;
41623 case V32QImode:
41624 if (TARGET_AVX)
41626 tmp = gen_reg_rtx (V16QImode);
41627 if (elt < 16)
41628 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
41629 else
41630 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
41631 ix86_expand_vector_extract (false, target, tmp, elt & 15);
41632 return;
41634 break;
41636 case V16HImode:
41637 if (TARGET_AVX)
41639 tmp = gen_reg_rtx (V8HImode);
41640 if (elt < 8)
41641 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
41642 else
41643 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
41644 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41645 return;
41647 break;
41649 case V8SImode:
41650 if (TARGET_AVX)
41652 tmp = gen_reg_rtx (V4SImode);
41653 if (elt < 4)
41654 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
41655 else
41656 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
41657 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41658 return;
41660 break;
41662 case V4DImode:
41663 if (TARGET_AVX)
41665 tmp = gen_reg_rtx (V2DImode);
41666 if (elt < 2)
41667 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
41668 else
41669 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
41670 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41671 return;
41673 break;
41675 case V16SFmode:
41676 tmp = gen_reg_rtx (V8SFmode);
41677 if (elt < 8)
41678 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
41679 else
41680 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
41681 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41682 return;
41684 case V8DFmode:
41685 tmp = gen_reg_rtx (V4DFmode);
41686 if (elt < 4)
41687 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
41688 else
41689 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
41690 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41691 return;
41693 case V16SImode:
41694 tmp = gen_reg_rtx (V8SImode);
41695 if (elt < 8)
41696 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
41697 else
41698 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
41699 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41700 return;
41702 case V8DImode:
41703 tmp = gen_reg_rtx (V4DImode);
41704 if (elt < 4)
41705 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
41706 else
41707 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
41708 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41709 return;
41711 case V8QImode:
41712 /* ??? Could extract the appropriate HImode element and shift. */
41713 default:
41714 break;
41717 if (use_vec_extr)
41719 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
41720 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
41722 /* Let the rtl optimizers know about the zero extension performed. */
41723 if (inner_mode == QImode || inner_mode == HImode)
41725 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
41726 target = gen_lowpart (SImode, target);
41729 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41731 else
41733 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41735 emit_move_insn (mem, vec);
41737 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41738 emit_move_insn (target, tmp);
41742 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41743 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41744 The upper bits of DEST are undefined, though they shouldn't cause
41745 exceptions (some bits from src or all zeros are ok). */
41747 static void
41748 emit_reduc_half (rtx dest, rtx src, int i)
41750 rtx tem, d = dest;
41751 switch (GET_MODE (src))
41753 case V4SFmode:
41754 if (i == 128)
41755 tem = gen_sse_movhlps (dest, src, src);
41756 else
41757 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41758 GEN_INT (1 + 4), GEN_INT (1 + 4));
41759 break;
41760 case V2DFmode:
41761 tem = gen_vec_interleave_highv2df (dest, src, src);
41762 break;
41763 case V16QImode:
41764 case V8HImode:
41765 case V4SImode:
41766 case V2DImode:
41767 d = gen_reg_rtx (V1TImode);
41768 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41769 GEN_INT (i / 2));
41770 break;
41771 case V8SFmode:
41772 if (i == 256)
41773 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41774 else
41775 tem = gen_avx_shufps256 (dest, src, src,
41776 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41777 break;
41778 case V4DFmode:
41779 if (i == 256)
41780 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41781 else
41782 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41783 break;
41784 case V32QImode:
41785 case V16HImode:
41786 case V8SImode:
41787 case V4DImode:
41788 if (i == 256)
41790 if (GET_MODE (dest) != V4DImode)
41791 d = gen_reg_rtx (V4DImode);
41792 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41793 gen_lowpart (V4DImode, src),
41794 const1_rtx);
41796 else
41798 d = gen_reg_rtx (V2TImode);
41799 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41800 GEN_INT (i / 2));
41802 break;
41803 case V16SImode:
41804 case V16SFmode:
41805 case V8DImode:
41806 case V8DFmode:
41807 if (i > 128)
41808 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41809 gen_lowpart (V16SImode, src),
41810 gen_lowpart (V16SImode, src),
41811 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41812 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41813 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41814 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41815 GEN_INT (0xC), GEN_INT (0xD),
41816 GEN_INT (0xE), GEN_INT (0xF),
41817 GEN_INT (0x10), GEN_INT (0x11),
41818 GEN_INT (0x12), GEN_INT (0x13),
41819 GEN_INT (0x14), GEN_INT (0x15),
41820 GEN_INT (0x16), GEN_INT (0x17));
41821 else
41822 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41823 gen_lowpart (V16SImode, src),
41824 GEN_INT (i == 128 ? 0x2 : 0x1),
41825 GEN_INT (0x3),
41826 GEN_INT (0x3),
41827 GEN_INT (0x3),
41828 GEN_INT (i == 128 ? 0x6 : 0x5),
41829 GEN_INT (0x7),
41830 GEN_INT (0x7),
41831 GEN_INT (0x7),
41832 GEN_INT (i == 128 ? 0xA : 0x9),
41833 GEN_INT (0xB),
41834 GEN_INT (0xB),
41835 GEN_INT (0xB),
41836 GEN_INT (i == 128 ? 0xE : 0xD),
41837 GEN_INT (0xF),
41838 GEN_INT (0xF),
41839 GEN_INT (0xF));
41840 break;
41841 default:
41842 gcc_unreachable ();
41844 emit_insn (tem);
41845 if (d != dest)
41846 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41849 /* Expand a vector reduction. FN is the binary pattern to reduce;
41850 DEST is the destination; IN is the input vector. */
41852 void
41853 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41855 rtx half, dst, vec = in;
41856 enum machine_mode mode = GET_MODE (in);
41857 int i;
41859 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41860 if (TARGET_SSE4_1
41861 && mode == V8HImode
41862 && fn == gen_uminv8hi3)
41864 emit_insn (gen_sse4_1_phminposuw (dest, in));
41865 return;
41868 for (i = GET_MODE_BITSIZE (mode);
41869 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41870 i >>= 1)
41872 half = gen_reg_rtx (mode);
41873 emit_reduc_half (half, vec, i);
41874 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41875 dst = dest;
41876 else
41877 dst = gen_reg_rtx (mode);
41878 emit_insn (fn (dst, half, vec));
41879 vec = dst;
41883 /* Target hook for scalar_mode_supported_p. */
41884 static bool
41885 ix86_scalar_mode_supported_p (enum machine_mode mode)
41887 if (DECIMAL_FLOAT_MODE_P (mode))
41888 return default_decimal_float_supported_p ();
41889 else if (mode == TFmode)
41890 return true;
41891 else
41892 return default_scalar_mode_supported_p (mode);
41895 /* Implements target hook vector_mode_supported_p. */
41896 static bool
41897 ix86_vector_mode_supported_p (enum machine_mode mode)
41899 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41900 return true;
41901 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41902 return true;
41903 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41904 return true;
41905 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41906 return true;
41907 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41908 return true;
41909 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41910 return true;
41911 return false;
41914 /* Target hook for c_mode_for_suffix. */
41915 static enum machine_mode
41916 ix86_c_mode_for_suffix (char suffix)
41918 if (suffix == 'q')
41919 return TFmode;
41920 if (suffix == 'w')
41921 return XFmode;
41923 return VOIDmode;
41926 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41928 We do this in the new i386 backend to maintain source compatibility
41929 with the old cc0-based compiler. */
41931 static tree
41932 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41933 tree inputs ATTRIBUTE_UNUSED,
41934 tree clobbers)
41936 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41937 clobbers);
41938 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41939 clobbers);
41940 return clobbers;
41943 /* Implements target vector targetm.asm.encode_section_info. */
41945 static void ATTRIBUTE_UNUSED
41946 ix86_encode_section_info (tree decl, rtx rtl, int first)
41948 default_encode_section_info (decl, rtl, first);
41950 if (TREE_CODE (decl) == VAR_DECL
41951 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41952 && ix86_in_large_data_p (decl))
41953 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41956 /* Worker function for REVERSE_CONDITION. */
41958 enum rtx_code
41959 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41961 return (mode != CCFPmode && mode != CCFPUmode
41962 ? reverse_condition (code)
41963 : reverse_condition_maybe_unordered (code));
41966 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41967 to OPERANDS[0]. */
41969 const char *
41970 output_387_reg_move (rtx insn, rtx *operands)
41972 if (REG_P (operands[0]))
41974 if (REG_P (operands[1])
41975 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41977 if (REGNO (operands[0]) == FIRST_STACK_REG)
41978 return output_387_ffreep (operands, 0);
41979 return "fstp\t%y0";
41981 if (STACK_TOP_P (operands[0]))
41982 return "fld%Z1\t%y1";
41983 return "fst\t%y0";
41985 else if (MEM_P (operands[0]))
41987 gcc_assert (REG_P (operands[1]));
41988 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41989 return "fstp%Z0\t%y0";
41990 else
41992 /* There is no non-popping store to memory for XFmode.
41993 So if we need one, follow the store with a load. */
41994 if (GET_MODE (operands[0]) == XFmode)
41995 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41996 else
41997 return "fst%Z0\t%y0";
42000 else
42001 gcc_unreachable();
42004 /* Output code to perform a conditional jump to LABEL, if C2 flag in
42005 FP status register is set. */
42007 void
42008 ix86_emit_fp_unordered_jump (rtx label)
42010 rtx reg = gen_reg_rtx (HImode);
42011 rtx temp;
42013 emit_insn (gen_x86_fnstsw_1 (reg));
42015 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
42017 emit_insn (gen_x86_sahf_1 (reg));
42019 temp = gen_rtx_REG (CCmode, FLAGS_REG);
42020 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
42022 else
42024 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
42026 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
42027 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
42030 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
42031 gen_rtx_LABEL_REF (VOIDmode, label),
42032 pc_rtx);
42033 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
42035 emit_jump_insn (temp);
42036 predict_jump (REG_BR_PROB_BASE * 10 / 100);
42039 /* Output code to perform a log1p XFmode calculation. */
42041 void ix86_emit_i387_log1p (rtx op0, rtx op1)
42043 rtx label1 = gen_label_rtx ();
42044 rtx label2 = gen_label_rtx ();
42046 rtx tmp = gen_reg_rtx (XFmode);
42047 rtx tmp2 = gen_reg_rtx (XFmode);
42048 rtx test;
42050 emit_insn (gen_absxf2 (tmp, op1));
42051 test = gen_rtx_GE (VOIDmode, tmp,
42052 CONST_DOUBLE_FROM_REAL_VALUE (
42053 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
42054 XFmode));
42055 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
42057 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
42058 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
42059 emit_jump (label2);
42061 emit_label (label1);
42062 emit_move_insn (tmp, CONST1_RTX (XFmode));
42063 emit_insn (gen_addxf3 (tmp, op1, tmp));
42064 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
42065 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
42067 emit_label (label2);
42070 /* Emit code for round calculation. */
42071 void ix86_emit_i387_round (rtx op0, rtx op1)
42073 enum machine_mode inmode = GET_MODE (op1);
42074 enum machine_mode outmode = GET_MODE (op0);
42075 rtx e1, e2, res, tmp, tmp1, half;
42076 rtx scratch = gen_reg_rtx (HImode);
42077 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
42078 rtx jump_label = gen_label_rtx ();
42079 rtx insn;
42080 rtx (*gen_abs) (rtx, rtx);
42081 rtx (*gen_neg) (rtx, rtx);
42083 switch (inmode)
42085 case SFmode:
42086 gen_abs = gen_abssf2;
42087 break;
42088 case DFmode:
42089 gen_abs = gen_absdf2;
42090 break;
42091 case XFmode:
42092 gen_abs = gen_absxf2;
42093 break;
42094 default:
42095 gcc_unreachable ();
42098 switch (outmode)
42100 case SFmode:
42101 gen_neg = gen_negsf2;
42102 break;
42103 case DFmode:
42104 gen_neg = gen_negdf2;
42105 break;
42106 case XFmode:
42107 gen_neg = gen_negxf2;
42108 break;
42109 case HImode:
42110 gen_neg = gen_neghi2;
42111 break;
42112 case SImode:
42113 gen_neg = gen_negsi2;
42114 break;
42115 case DImode:
42116 gen_neg = gen_negdi2;
42117 break;
42118 default:
42119 gcc_unreachable ();
42122 e1 = gen_reg_rtx (inmode);
42123 e2 = gen_reg_rtx (inmode);
42124 res = gen_reg_rtx (outmode);
42126 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
42128 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
42130 /* scratch = fxam(op1) */
42131 emit_insn (gen_rtx_SET (VOIDmode, scratch,
42132 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
42133 UNSPEC_FXAM)));
42134 /* e1 = fabs(op1) */
42135 emit_insn (gen_abs (e1, op1));
42137 /* e2 = e1 + 0.5 */
42138 half = force_reg (inmode, half);
42139 emit_insn (gen_rtx_SET (VOIDmode, e2,
42140 gen_rtx_PLUS (inmode, e1, half)));
42142 /* res = floor(e2) */
42143 if (inmode != XFmode)
42145 tmp1 = gen_reg_rtx (XFmode);
42147 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
42148 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
42150 else
42151 tmp1 = e2;
42153 switch (outmode)
42155 case SFmode:
42156 case DFmode:
42158 rtx tmp0 = gen_reg_rtx (XFmode);
42160 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
42162 emit_insn (gen_rtx_SET (VOIDmode, res,
42163 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
42164 UNSPEC_TRUNC_NOOP)));
42166 break;
42167 case XFmode:
42168 emit_insn (gen_frndintxf2_floor (res, tmp1));
42169 break;
42170 case HImode:
42171 emit_insn (gen_lfloorxfhi2 (res, tmp1));
42172 break;
42173 case SImode:
42174 emit_insn (gen_lfloorxfsi2 (res, tmp1));
42175 break;
42176 case DImode:
42177 emit_insn (gen_lfloorxfdi2 (res, tmp1));
42178 break;
42179 default:
42180 gcc_unreachable ();
42183 /* flags = signbit(a) */
42184 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
42186 /* if (flags) then res = -res */
42187 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
42188 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
42189 gen_rtx_LABEL_REF (VOIDmode, jump_label),
42190 pc_rtx);
42191 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
42192 predict_jump (REG_BR_PROB_BASE * 50 / 100);
42193 JUMP_LABEL (insn) = jump_label;
42195 emit_insn (gen_neg (res, res));
42197 emit_label (jump_label);
42198 LABEL_NUSES (jump_label) = 1;
42200 emit_move_insn (op0, res);
42203 /* Output code to perform a Newton-Rhapson approximation of a single precision
42204 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
42206 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
42208 rtx x0, x1, e0, e1;
42210 x0 = gen_reg_rtx (mode);
42211 e0 = gen_reg_rtx (mode);
42212 e1 = gen_reg_rtx (mode);
42213 x1 = gen_reg_rtx (mode);
42215 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
42217 b = force_reg (mode, b);
42219 /* x0 = rcp(b) estimate */
42220 if (mode == V16SFmode || mode == V8DFmode)
42221 emit_insn (gen_rtx_SET (VOIDmode, x0,
42222 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
42223 UNSPEC_RCP14)));
42224 else
42225 emit_insn (gen_rtx_SET (VOIDmode, x0,
42226 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
42227 UNSPEC_RCP)));
42229 /* e0 = x0 * b */
42230 emit_insn (gen_rtx_SET (VOIDmode, e0,
42231 gen_rtx_MULT (mode, x0, b)));
42233 /* e0 = x0 * e0 */
42234 emit_insn (gen_rtx_SET (VOIDmode, e0,
42235 gen_rtx_MULT (mode, x0, e0)));
42237 /* e1 = x0 + x0 */
42238 emit_insn (gen_rtx_SET (VOIDmode, e1,
42239 gen_rtx_PLUS (mode, x0, x0)));
42241 /* x1 = e1 - e0 */
42242 emit_insn (gen_rtx_SET (VOIDmode, x1,
42243 gen_rtx_MINUS (mode, e1, e0)));
42245 /* res = a * x1 */
42246 emit_insn (gen_rtx_SET (VOIDmode, res,
42247 gen_rtx_MULT (mode, a, x1)));
42250 /* Output code to perform a Newton-Rhapson approximation of a
42251 single precision floating point [reciprocal] square root. */
42253 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
42254 bool recip)
42256 rtx x0, e0, e1, e2, e3, mthree, mhalf;
42257 REAL_VALUE_TYPE r;
42258 int unspec;
42260 x0 = gen_reg_rtx (mode);
42261 e0 = gen_reg_rtx (mode);
42262 e1 = gen_reg_rtx (mode);
42263 e2 = gen_reg_rtx (mode);
42264 e3 = gen_reg_rtx (mode);
42266 real_from_integer (&r, VOIDmode, -3, -1, 0);
42267 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
42269 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
42270 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
42271 unspec = UNSPEC_RSQRT;
42273 if (VECTOR_MODE_P (mode))
42275 mthree = ix86_build_const_vector (mode, true, mthree);
42276 mhalf = ix86_build_const_vector (mode, true, mhalf);
42277 /* There is no 512-bit rsqrt. There is however rsqrt14. */
42278 if (GET_MODE_SIZE (mode) == 64)
42279 unspec = UNSPEC_RSQRT14;
42282 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
42283 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
42285 a = force_reg (mode, a);
42287 /* x0 = rsqrt(a) estimate */
42288 emit_insn (gen_rtx_SET (VOIDmode, x0,
42289 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
42290 unspec)));
42292 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
42293 if (!recip)
42295 rtx zero, mask;
42297 zero = gen_reg_rtx (mode);
42298 mask = gen_reg_rtx (mode);
42300 zero = force_reg (mode, CONST0_RTX(mode));
42302 /* Handle masked compare. */
42303 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
42305 mask = gen_reg_rtx (HImode);
42306 /* Imm value 0x4 corresponds to not-equal comparison. */
42307 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
42308 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
42310 else
42312 emit_insn (gen_rtx_SET (VOIDmode, mask,
42313 gen_rtx_NE (mode, zero, a)));
42315 emit_insn (gen_rtx_SET (VOIDmode, x0,
42316 gen_rtx_AND (mode, x0, mask)));
42320 /* e0 = x0 * a */
42321 emit_insn (gen_rtx_SET (VOIDmode, e0,
42322 gen_rtx_MULT (mode, x0, a)));
42323 /* e1 = e0 * x0 */
42324 emit_insn (gen_rtx_SET (VOIDmode, e1,
42325 gen_rtx_MULT (mode, e0, x0)));
42327 /* e2 = e1 - 3. */
42328 mthree = force_reg (mode, mthree);
42329 emit_insn (gen_rtx_SET (VOIDmode, e2,
42330 gen_rtx_PLUS (mode, e1, mthree)));
42332 mhalf = force_reg (mode, mhalf);
42333 if (recip)
42334 /* e3 = -.5 * x0 */
42335 emit_insn (gen_rtx_SET (VOIDmode, e3,
42336 gen_rtx_MULT (mode, x0, mhalf)));
42337 else
42338 /* e3 = -.5 * e0 */
42339 emit_insn (gen_rtx_SET (VOIDmode, e3,
42340 gen_rtx_MULT (mode, e0, mhalf)));
42341 /* ret = e2 * e3 */
42342 emit_insn (gen_rtx_SET (VOIDmode, res,
42343 gen_rtx_MULT (mode, e2, e3)));
42346 #ifdef TARGET_SOLARIS
42347 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
42349 static void
42350 i386_solaris_elf_named_section (const char *name, unsigned int flags,
42351 tree decl)
42353 /* With Binutils 2.15, the "@unwind" marker must be specified on
42354 every occurrence of the ".eh_frame" section, not just the first
42355 one. */
42356 if (TARGET_64BIT
42357 && strcmp (name, ".eh_frame") == 0)
42359 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
42360 flags & SECTION_WRITE ? "aw" : "a");
42361 return;
42364 #ifndef USE_GAS
42365 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
42367 solaris_elf_asm_comdat_section (name, flags, decl);
42368 return;
42370 #endif
42372 default_elf_asm_named_section (name, flags, decl);
42374 #endif /* TARGET_SOLARIS */
42376 /* Return the mangling of TYPE if it is an extended fundamental type. */
42378 static const char *
42379 ix86_mangle_type (const_tree type)
42381 type = TYPE_MAIN_VARIANT (type);
42383 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
42384 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
42385 return NULL;
42387 switch (TYPE_MODE (type))
42389 case TFmode:
42390 /* __float128 is "g". */
42391 return "g";
42392 case XFmode:
42393 /* "long double" or __float80 is "e". */
42394 return "e";
42395 default:
42396 return NULL;
42400 /* For 32-bit code we can save PIC register setup by using
42401 __stack_chk_fail_local hidden function instead of calling
42402 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
42403 register, so it is better to call __stack_chk_fail directly. */
42405 static tree ATTRIBUTE_UNUSED
42406 ix86_stack_protect_fail (void)
42408 return TARGET_64BIT
42409 ? default_external_stack_protect_fail ()
42410 : default_hidden_stack_protect_fail ();
42413 /* Select a format to encode pointers in exception handling data. CODE
42414 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
42415 true if the symbol may be affected by dynamic relocations.
42417 ??? All x86 object file formats are capable of representing this.
42418 After all, the relocation needed is the same as for the call insn.
42419 Whether or not a particular assembler allows us to enter such, I
42420 guess we'll have to see. */
42422 asm_preferred_eh_data_format (int code, int global)
42424 if (flag_pic)
42426 int type = DW_EH_PE_sdata8;
42427 if (!TARGET_64BIT
42428 || ix86_cmodel == CM_SMALL_PIC
42429 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
42430 type = DW_EH_PE_sdata4;
42431 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
42433 if (ix86_cmodel == CM_SMALL
42434 || (ix86_cmodel == CM_MEDIUM && code))
42435 return DW_EH_PE_udata4;
42436 return DW_EH_PE_absptr;
42439 /* Expand copysign from SIGN to the positive value ABS_VALUE
42440 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
42441 the sign-bit. */
42442 static void
42443 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
42445 enum machine_mode mode = GET_MODE (sign);
42446 rtx sgn = gen_reg_rtx (mode);
42447 if (mask == NULL_RTX)
42449 enum machine_mode vmode;
42451 if (mode == SFmode)
42452 vmode = V4SFmode;
42453 else if (mode == DFmode)
42454 vmode = V2DFmode;
42455 else
42456 vmode = mode;
42458 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
42459 if (!VECTOR_MODE_P (mode))
42461 /* We need to generate a scalar mode mask in this case. */
42462 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
42463 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
42464 mask = gen_reg_rtx (mode);
42465 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
42468 else
42469 mask = gen_rtx_NOT (mode, mask);
42470 emit_insn (gen_rtx_SET (VOIDmode, sgn,
42471 gen_rtx_AND (mode, mask, sign)));
42472 emit_insn (gen_rtx_SET (VOIDmode, result,
42473 gen_rtx_IOR (mode, abs_value, sgn)));
42476 /* Expand fabs (OP0) and return a new rtx that holds the result. The
42477 mask for masking out the sign-bit is stored in *SMASK, if that is
42478 non-null. */
42479 static rtx
42480 ix86_expand_sse_fabs (rtx op0, rtx *smask)
42482 enum machine_mode vmode, mode = GET_MODE (op0);
42483 rtx xa, mask;
42485 xa = gen_reg_rtx (mode);
42486 if (mode == SFmode)
42487 vmode = V4SFmode;
42488 else if (mode == DFmode)
42489 vmode = V2DFmode;
42490 else
42491 vmode = mode;
42492 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
42493 if (!VECTOR_MODE_P (mode))
42495 /* We need to generate a scalar mode mask in this case. */
42496 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
42497 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
42498 mask = gen_reg_rtx (mode);
42499 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
42501 emit_insn (gen_rtx_SET (VOIDmode, xa,
42502 gen_rtx_AND (mode, op0, mask)));
42504 if (smask)
42505 *smask = mask;
42507 return xa;
42510 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
42511 swapping the operands if SWAP_OPERANDS is true. The expanded
42512 code is a forward jump to a newly created label in case the
42513 comparison is true. The generated label rtx is returned. */
42514 static rtx
42515 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
42516 bool swap_operands)
42518 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
42519 rtx label, tmp;
42521 if (swap_operands)
42523 tmp = op0;
42524 op0 = op1;
42525 op1 = tmp;
42528 label = gen_label_rtx ();
42529 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
42530 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42531 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
42532 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
42533 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
42534 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
42535 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
42536 JUMP_LABEL (tmp) = label;
42538 return label;
42541 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
42542 using comparison code CODE. Operands are swapped for the comparison if
42543 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
42544 static rtx
42545 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
42546 bool swap_operands)
42548 rtx (*insn)(rtx, rtx, rtx, rtx);
42549 enum machine_mode mode = GET_MODE (op0);
42550 rtx mask = gen_reg_rtx (mode);
42552 if (swap_operands)
42554 rtx tmp = op0;
42555 op0 = op1;
42556 op1 = tmp;
42559 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
42561 emit_insn (insn (mask, op0, op1,
42562 gen_rtx_fmt_ee (code, mode, op0, op1)));
42563 return mask;
42566 /* Generate and return a rtx of mode MODE for 2**n where n is the number
42567 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
42568 static rtx
42569 ix86_gen_TWO52 (enum machine_mode mode)
42571 REAL_VALUE_TYPE TWO52r;
42572 rtx TWO52;
42574 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
42575 TWO52 = const_double_from_real_value (TWO52r, mode);
42576 TWO52 = force_reg (mode, TWO52);
42578 return TWO52;
42581 /* Expand SSE sequence for computing lround from OP1 storing
42582 into OP0. */
42583 void
42584 ix86_expand_lround (rtx op0, rtx op1)
42586 /* C code for the stuff we're doing below:
42587 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
42588 return (long)tmp;
42590 enum machine_mode mode = GET_MODE (op1);
42591 const struct real_format *fmt;
42592 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42593 rtx adj;
42595 /* load nextafter (0.5, 0.0) */
42596 fmt = REAL_MODE_FORMAT (mode);
42597 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42598 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42600 /* adj = copysign (0.5, op1) */
42601 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
42602 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
42604 /* adj = op1 + adj */
42605 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
42607 /* op0 = (imode)adj */
42608 expand_fix (op0, adj, 0);
42611 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
42612 into OPERAND0. */
42613 void
42614 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
42616 /* C code for the stuff we're doing below (for do_floor):
42617 xi = (long)op1;
42618 xi -= (double)xi > op1 ? 1 : 0;
42619 return xi;
42621 enum machine_mode fmode = GET_MODE (op1);
42622 enum machine_mode imode = GET_MODE (op0);
42623 rtx ireg, freg, label, tmp;
42625 /* reg = (long)op1 */
42626 ireg = gen_reg_rtx (imode);
42627 expand_fix (ireg, op1, 0);
42629 /* freg = (double)reg */
42630 freg = gen_reg_rtx (fmode);
42631 expand_float (freg, ireg, 0);
42633 /* ireg = (freg > op1) ? ireg - 1 : ireg */
42634 label = ix86_expand_sse_compare_and_jump (UNLE,
42635 freg, op1, !do_floor);
42636 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
42637 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
42638 emit_move_insn (ireg, tmp);
42640 emit_label (label);
42641 LABEL_NUSES (label) = 1;
42643 emit_move_insn (op0, ireg);
42646 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
42647 result in OPERAND0. */
42648 void
42649 ix86_expand_rint (rtx operand0, rtx operand1)
42651 /* C code for the stuff we're doing below:
42652 xa = fabs (operand1);
42653 if (!isless (xa, 2**52))
42654 return operand1;
42655 xa = xa + 2**52 - 2**52;
42656 return copysign (xa, operand1);
42658 enum machine_mode mode = GET_MODE (operand0);
42659 rtx res, xa, label, TWO52, mask;
42661 res = gen_reg_rtx (mode);
42662 emit_move_insn (res, operand1);
42664 /* xa = abs (operand1) */
42665 xa = ix86_expand_sse_fabs (res, &mask);
42667 /* if (!isless (xa, TWO52)) goto label; */
42668 TWO52 = ix86_gen_TWO52 (mode);
42669 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42671 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42672 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42674 ix86_sse_copysign_to_positive (res, xa, res, mask);
42676 emit_label (label);
42677 LABEL_NUSES (label) = 1;
42679 emit_move_insn (operand0, res);
42682 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42683 into OPERAND0. */
42684 void
42685 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
42687 /* C code for the stuff we expand below.
42688 double xa = fabs (x), x2;
42689 if (!isless (xa, TWO52))
42690 return x;
42691 xa = xa + TWO52 - TWO52;
42692 x2 = copysign (xa, x);
42693 Compensate. Floor:
42694 if (x2 > x)
42695 x2 -= 1;
42696 Compensate. Ceil:
42697 if (x2 < x)
42698 x2 -= -1;
42699 return x2;
42701 enum machine_mode mode = GET_MODE (operand0);
42702 rtx xa, TWO52, tmp, label, one, res, mask;
42704 TWO52 = ix86_gen_TWO52 (mode);
42706 /* Temporary for holding the result, initialized to the input
42707 operand to ease control flow. */
42708 res = gen_reg_rtx (mode);
42709 emit_move_insn (res, operand1);
42711 /* xa = abs (operand1) */
42712 xa = ix86_expand_sse_fabs (res, &mask);
42714 /* if (!isless (xa, TWO52)) goto label; */
42715 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42717 /* xa = xa + TWO52 - TWO52; */
42718 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42719 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42721 /* xa = copysign (xa, operand1) */
42722 ix86_sse_copysign_to_positive (xa, xa, res, mask);
42724 /* generate 1.0 or -1.0 */
42725 one = force_reg (mode,
42726 const_double_from_real_value (do_floor
42727 ? dconst1 : dconstm1, mode));
42729 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42730 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42731 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42732 gen_rtx_AND (mode, one, tmp)));
42733 /* We always need to subtract here to preserve signed zero. */
42734 tmp = expand_simple_binop (mode, MINUS,
42735 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42736 emit_move_insn (res, tmp);
42738 emit_label (label);
42739 LABEL_NUSES (label) = 1;
42741 emit_move_insn (operand0, res);
42744 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42745 into OPERAND0. */
42746 void
42747 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
42749 /* C code for the stuff we expand below.
42750 double xa = fabs (x), x2;
42751 if (!isless (xa, TWO52))
42752 return x;
42753 x2 = (double)(long)x;
42754 Compensate. Floor:
42755 if (x2 > x)
42756 x2 -= 1;
42757 Compensate. Ceil:
42758 if (x2 < x)
42759 x2 += 1;
42760 if (HONOR_SIGNED_ZEROS (mode))
42761 return copysign (x2, x);
42762 return x2;
42764 enum machine_mode mode = GET_MODE (operand0);
42765 rtx xa, xi, TWO52, tmp, label, one, res, mask;
42767 TWO52 = ix86_gen_TWO52 (mode);
42769 /* Temporary for holding the result, initialized to the input
42770 operand to ease control flow. */
42771 res = gen_reg_rtx (mode);
42772 emit_move_insn (res, operand1);
42774 /* xa = abs (operand1) */
42775 xa = ix86_expand_sse_fabs (res, &mask);
42777 /* if (!isless (xa, TWO52)) goto label; */
42778 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42780 /* xa = (double)(long)x */
42781 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42782 expand_fix (xi, res, 0);
42783 expand_float (xa, xi, 0);
42785 /* generate 1.0 */
42786 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42788 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42789 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42790 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42791 gen_rtx_AND (mode, one, tmp)));
42792 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42793 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42794 emit_move_insn (res, tmp);
42796 if (HONOR_SIGNED_ZEROS (mode))
42797 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42799 emit_label (label);
42800 LABEL_NUSES (label) = 1;
42802 emit_move_insn (operand0, res);
42805 /* Expand SSE sequence for computing round from OPERAND1 storing
42806 into OPERAND0. Sequence that works without relying on DImode truncation
42807 via cvttsd2siq that is only available on 64bit targets. */
42808 void
42809 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42811 /* C code for the stuff we expand below.
42812 double xa = fabs (x), xa2, x2;
42813 if (!isless (xa, TWO52))
42814 return x;
42815 Using the absolute value and copying back sign makes
42816 -0.0 -> -0.0 correct.
42817 xa2 = xa + TWO52 - TWO52;
42818 Compensate.
42819 dxa = xa2 - xa;
42820 if (dxa <= -0.5)
42821 xa2 += 1;
42822 else if (dxa > 0.5)
42823 xa2 -= 1;
42824 x2 = copysign (xa2, x);
42825 return x2;
42827 enum machine_mode mode = GET_MODE (operand0);
42828 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42830 TWO52 = ix86_gen_TWO52 (mode);
42832 /* Temporary for holding the result, initialized to the input
42833 operand to ease control flow. */
42834 res = gen_reg_rtx (mode);
42835 emit_move_insn (res, operand1);
42837 /* xa = abs (operand1) */
42838 xa = ix86_expand_sse_fabs (res, &mask);
42840 /* if (!isless (xa, TWO52)) goto label; */
42841 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42843 /* xa2 = xa + TWO52 - TWO52; */
42844 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42845 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42847 /* dxa = xa2 - xa; */
42848 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42850 /* generate 0.5, 1.0 and -0.5 */
42851 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42852 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42853 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42854 0, OPTAB_DIRECT);
42856 /* Compensate. */
42857 tmp = gen_reg_rtx (mode);
42858 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42859 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42860 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42861 gen_rtx_AND (mode, one, tmp)));
42862 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42863 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42864 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42865 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42866 gen_rtx_AND (mode, one, tmp)));
42867 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42869 /* res = copysign (xa2, operand1) */
42870 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42872 emit_label (label);
42873 LABEL_NUSES (label) = 1;
42875 emit_move_insn (operand0, res);
42878 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42879 into OPERAND0. */
42880 void
42881 ix86_expand_trunc (rtx operand0, rtx operand1)
42883 /* C code for SSE variant we expand below.
42884 double xa = fabs (x), x2;
42885 if (!isless (xa, TWO52))
42886 return x;
42887 x2 = (double)(long)x;
42888 if (HONOR_SIGNED_ZEROS (mode))
42889 return copysign (x2, x);
42890 return x2;
42892 enum machine_mode mode = GET_MODE (operand0);
42893 rtx xa, xi, TWO52, label, res, mask;
42895 TWO52 = ix86_gen_TWO52 (mode);
42897 /* Temporary for holding the result, initialized to the input
42898 operand to ease control flow. */
42899 res = gen_reg_rtx (mode);
42900 emit_move_insn (res, operand1);
42902 /* xa = abs (operand1) */
42903 xa = ix86_expand_sse_fabs (res, &mask);
42905 /* if (!isless (xa, TWO52)) goto label; */
42906 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42908 /* x = (double)(long)x */
42909 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42910 expand_fix (xi, res, 0);
42911 expand_float (res, xi, 0);
42913 if (HONOR_SIGNED_ZEROS (mode))
42914 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42916 emit_label (label);
42917 LABEL_NUSES (label) = 1;
42919 emit_move_insn (operand0, res);
42922 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42923 into OPERAND0. */
42924 void
42925 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42927 enum machine_mode mode = GET_MODE (operand0);
42928 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42930 /* C code for SSE variant we expand below.
42931 double xa = fabs (x), x2;
42932 if (!isless (xa, TWO52))
42933 return x;
42934 xa2 = xa + TWO52 - TWO52;
42935 Compensate:
42936 if (xa2 > xa)
42937 xa2 -= 1.0;
42938 x2 = copysign (xa2, x);
42939 return x2;
42942 TWO52 = ix86_gen_TWO52 (mode);
42944 /* Temporary for holding the result, initialized to the input
42945 operand to ease control flow. */
42946 res = gen_reg_rtx (mode);
42947 emit_move_insn (res, operand1);
42949 /* xa = abs (operand1) */
42950 xa = ix86_expand_sse_fabs (res, &smask);
42952 /* if (!isless (xa, TWO52)) goto label; */
42953 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42955 /* res = xa + TWO52 - TWO52; */
42956 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42957 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42958 emit_move_insn (res, tmp);
42960 /* generate 1.0 */
42961 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42963 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42964 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42965 emit_insn (gen_rtx_SET (VOIDmode, mask,
42966 gen_rtx_AND (mode, mask, one)));
42967 tmp = expand_simple_binop (mode, MINUS,
42968 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42969 emit_move_insn (res, tmp);
42971 /* res = copysign (res, operand1) */
42972 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42974 emit_label (label);
42975 LABEL_NUSES (label) = 1;
42977 emit_move_insn (operand0, res);
42980 /* Expand SSE sequence for computing round from OPERAND1 storing
42981 into OPERAND0. */
42982 void
42983 ix86_expand_round (rtx operand0, rtx operand1)
42985 /* C code for the stuff we're doing below:
42986 double xa = fabs (x);
42987 if (!isless (xa, TWO52))
42988 return x;
42989 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42990 return copysign (xa, x);
42992 enum machine_mode mode = GET_MODE (operand0);
42993 rtx res, TWO52, xa, label, xi, half, mask;
42994 const struct real_format *fmt;
42995 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42997 /* Temporary for holding the result, initialized to the input
42998 operand to ease control flow. */
42999 res = gen_reg_rtx (mode);
43000 emit_move_insn (res, operand1);
43002 TWO52 = ix86_gen_TWO52 (mode);
43003 xa = ix86_expand_sse_fabs (res, &mask);
43004 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
43006 /* load nextafter (0.5, 0.0) */
43007 fmt = REAL_MODE_FORMAT (mode);
43008 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
43009 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
43011 /* xa = xa + 0.5 */
43012 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
43013 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
43015 /* xa = (double)(int64_t)xa */
43016 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
43017 expand_fix (xi, xa, 0);
43018 expand_float (xa, xi, 0);
43020 /* res = copysign (xa, operand1) */
43021 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
43023 emit_label (label);
43024 LABEL_NUSES (label) = 1;
43026 emit_move_insn (operand0, res);
43029 /* Expand SSE sequence for computing round
43030 from OP1 storing into OP0 using sse4 round insn. */
43031 void
43032 ix86_expand_round_sse4 (rtx op0, rtx op1)
43034 enum machine_mode mode = GET_MODE (op0);
43035 rtx e1, e2, res, half;
43036 const struct real_format *fmt;
43037 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
43038 rtx (*gen_copysign) (rtx, rtx, rtx);
43039 rtx (*gen_round) (rtx, rtx, rtx);
43041 switch (mode)
43043 case SFmode:
43044 gen_copysign = gen_copysignsf3;
43045 gen_round = gen_sse4_1_roundsf2;
43046 break;
43047 case DFmode:
43048 gen_copysign = gen_copysigndf3;
43049 gen_round = gen_sse4_1_rounddf2;
43050 break;
43051 default:
43052 gcc_unreachable ();
43055 /* round (a) = trunc (a + copysign (0.5, a)) */
43057 /* load nextafter (0.5, 0.0) */
43058 fmt = REAL_MODE_FORMAT (mode);
43059 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
43060 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
43061 half = const_double_from_real_value (pred_half, mode);
43063 /* e1 = copysign (0.5, op1) */
43064 e1 = gen_reg_rtx (mode);
43065 emit_insn (gen_copysign (e1, half, op1));
43067 /* e2 = op1 + e1 */
43068 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
43070 /* res = trunc (e2) */
43071 res = gen_reg_rtx (mode);
43072 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
43074 emit_move_insn (op0, res);
43078 /* Table of valid machine attributes. */
43079 static const struct attribute_spec ix86_attribute_table[] =
43081 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
43082 affects_type_identity } */
43083 /* Stdcall attribute says callee is responsible for popping arguments
43084 if they are not variable. */
43085 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
43086 true },
43087 /* Fastcall attribute says callee is responsible for popping arguments
43088 if they are not variable. */
43089 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
43090 true },
43091 /* Thiscall attribute says callee is responsible for popping arguments
43092 if they are not variable. */
43093 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
43094 true },
43095 /* Cdecl attribute says the callee is a normal C declaration */
43096 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
43097 true },
43098 /* Regparm attribute specifies how many integer arguments are to be
43099 passed in registers. */
43100 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
43101 true },
43102 /* Sseregparm attribute says we are using x86_64 calling conventions
43103 for FP arguments. */
43104 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
43105 true },
43106 /* The transactional memory builtins are implicitly regparm or fastcall
43107 depending on the ABI. Override the generic do-nothing attribute that
43108 these builtins were declared with. */
43109 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
43110 true },
43111 /* force_align_arg_pointer says this function realigns the stack at entry. */
43112 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
43113 false, true, true, ix86_handle_cconv_attribute, false },
43114 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
43115 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
43116 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
43117 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
43118 false },
43119 #endif
43120 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
43121 false },
43122 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
43123 false },
43124 #ifdef SUBTARGET_ATTRIBUTE_TABLE
43125 SUBTARGET_ATTRIBUTE_TABLE,
43126 #endif
43127 /* ms_abi and sysv_abi calling convention function attributes. */
43128 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
43129 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
43130 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
43131 false },
43132 { "callee_pop_aggregate_return", 1, 1, false, true, true,
43133 ix86_handle_callee_pop_aggregate_return, true },
43134 /* End element. */
43135 { NULL, 0, 0, false, false, false, NULL, false }
43138 /* Implement targetm.vectorize.builtin_vectorization_cost. */
43139 static int
43140 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
43141 tree vectype,
43142 int misalign ATTRIBUTE_UNUSED)
43144 unsigned elements;
43146 switch (type_of_cost)
43148 case scalar_stmt:
43149 return ix86_cost->scalar_stmt_cost;
43151 case scalar_load:
43152 return ix86_cost->scalar_load_cost;
43154 case scalar_store:
43155 return ix86_cost->scalar_store_cost;
43157 case vector_stmt:
43158 return ix86_cost->vec_stmt_cost;
43160 case vector_load:
43161 return ix86_cost->vec_align_load_cost;
43163 case vector_store:
43164 return ix86_cost->vec_store_cost;
43166 case vec_to_scalar:
43167 return ix86_cost->vec_to_scalar_cost;
43169 case scalar_to_vec:
43170 return ix86_cost->scalar_to_vec_cost;
43172 case unaligned_load:
43173 case unaligned_store:
43174 return ix86_cost->vec_unalign_load_cost;
43176 case cond_branch_taken:
43177 return ix86_cost->cond_taken_branch_cost;
43179 case cond_branch_not_taken:
43180 return ix86_cost->cond_not_taken_branch_cost;
43182 case vec_perm:
43183 case vec_promote_demote:
43184 return ix86_cost->vec_stmt_cost;
43186 case vec_construct:
43187 elements = TYPE_VECTOR_SUBPARTS (vectype);
43188 return elements / 2 + 1;
43190 default:
43191 gcc_unreachable ();
43195 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
43196 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
43197 insn every time. */
43199 static GTY(()) rtx vselect_insn;
43201 /* Initialize vselect_insn. */
43203 static void
43204 init_vselect_insn (void)
43206 unsigned i;
43207 rtx x;
43209 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
43210 for (i = 0; i < MAX_VECT_LEN; ++i)
43211 XVECEXP (x, 0, i) = const0_rtx;
43212 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
43213 const0_rtx), x);
43214 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
43215 start_sequence ();
43216 vselect_insn = emit_insn (x);
43217 end_sequence ();
43220 /* Construct (set target (vec_select op0 (parallel perm))) and
43221 return true if that's a valid instruction in the active ISA. */
43223 static bool
43224 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
43225 unsigned nelt, bool testing_p)
43227 unsigned int i;
43228 rtx x, save_vconcat;
43229 int icode;
43231 if (vselect_insn == NULL_RTX)
43232 init_vselect_insn ();
43234 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
43235 PUT_NUM_ELEM (XVEC (x, 0), nelt);
43236 for (i = 0; i < nelt; ++i)
43237 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
43238 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
43239 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
43240 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
43241 SET_DEST (PATTERN (vselect_insn)) = target;
43242 icode = recog_memoized (vselect_insn);
43244 if (icode >= 0 && !testing_p)
43245 emit_insn (copy_rtx (PATTERN (vselect_insn)));
43247 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
43248 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
43249 INSN_CODE (vselect_insn) = -1;
43251 return icode >= 0;
43254 /* Similar, but generate a vec_concat from op0 and op1 as well. */
43256 static bool
43257 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
43258 const unsigned char *perm, unsigned nelt,
43259 bool testing_p)
43261 enum machine_mode v2mode;
43262 rtx x;
43263 bool ok;
43265 if (vselect_insn == NULL_RTX)
43266 init_vselect_insn ();
43268 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
43269 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
43270 PUT_MODE (x, v2mode);
43271 XEXP (x, 0) = op0;
43272 XEXP (x, 1) = op1;
43273 ok = expand_vselect (target, x, perm, nelt, testing_p);
43274 XEXP (x, 0) = const0_rtx;
43275 XEXP (x, 1) = const0_rtx;
43276 return ok;
43279 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43280 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
43282 static bool
43283 expand_vec_perm_blend (struct expand_vec_perm_d *d)
43285 enum machine_mode vmode = d->vmode;
43286 unsigned i, mask, nelt = d->nelt;
43287 rtx target, op0, op1, x;
43288 rtx rperm[32], vperm;
43290 if (d->one_operand_p)
43291 return false;
43292 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
43294 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
43296 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43298 else
43299 return false;
43301 /* This is a blend, not a permute. Elements must stay in their
43302 respective lanes. */
43303 for (i = 0; i < nelt; ++i)
43305 unsigned e = d->perm[i];
43306 if (!(e == i || e == i + nelt))
43307 return false;
43310 if (d->testing_p)
43311 return true;
43313 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
43314 decision should be extracted elsewhere, so that we only try that
43315 sequence once all budget==3 options have been tried. */
43316 target = d->target;
43317 op0 = d->op0;
43318 op1 = d->op1;
43319 mask = 0;
43321 switch (vmode)
43323 case V4DFmode:
43324 case V8SFmode:
43325 case V2DFmode:
43326 case V4SFmode:
43327 case V8HImode:
43328 case V8SImode:
43329 for (i = 0; i < nelt; ++i)
43330 mask |= (d->perm[i] >= nelt) << i;
43331 break;
43333 case V2DImode:
43334 for (i = 0; i < 2; ++i)
43335 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
43336 vmode = V8HImode;
43337 goto do_subreg;
43339 case V4SImode:
43340 for (i = 0; i < 4; ++i)
43341 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
43342 vmode = V8HImode;
43343 goto do_subreg;
43345 case V16QImode:
43346 /* See if bytes move in pairs so we can use pblendw with
43347 an immediate argument, rather than pblendvb with a vector
43348 argument. */
43349 for (i = 0; i < 16; i += 2)
43350 if (d->perm[i] + 1 != d->perm[i + 1])
43352 use_pblendvb:
43353 for (i = 0; i < nelt; ++i)
43354 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
43356 finish_pblendvb:
43357 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
43358 vperm = force_reg (vmode, vperm);
43360 if (GET_MODE_SIZE (vmode) == 16)
43361 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
43362 else
43363 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
43364 if (target != d->target)
43365 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43366 return true;
43369 for (i = 0; i < 8; ++i)
43370 mask |= (d->perm[i * 2] >= 16) << i;
43371 vmode = V8HImode;
43372 /* FALLTHRU */
43374 do_subreg:
43375 target = gen_reg_rtx (vmode);
43376 op0 = gen_lowpart (vmode, op0);
43377 op1 = gen_lowpart (vmode, op1);
43378 break;
43380 case V32QImode:
43381 /* See if bytes move in pairs. If not, vpblendvb must be used. */
43382 for (i = 0; i < 32; i += 2)
43383 if (d->perm[i] + 1 != d->perm[i + 1])
43384 goto use_pblendvb;
43385 /* See if bytes move in quadruplets. If yes, vpblendd
43386 with immediate can be used. */
43387 for (i = 0; i < 32; i += 4)
43388 if (d->perm[i] + 2 != d->perm[i + 2])
43389 break;
43390 if (i < 32)
43392 /* See if bytes move the same in both lanes. If yes,
43393 vpblendw with immediate can be used. */
43394 for (i = 0; i < 16; i += 2)
43395 if (d->perm[i] + 16 != d->perm[i + 16])
43396 goto use_pblendvb;
43398 /* Use vpblendw. */
43399 for (i = 0; i < 16; ++i)
43400 mask |= (d->perm[i * 2] >= 32) << i;
43401 vmode = V16HImode;
43402 goto do_subreg;
43405 /* Use vpblendd. */
43406 for (i = 0; i < 8; ++i)
43407 mask |= (d->perm[i * 4] >= 32) << i;
43408 vmode = V8SImode;
43409 goto do_subreg;
43411 case V16HImode:
43412 /* See if words move in pairs. If yes, vpblendd can be used. */
43413 for (i = 0; i < 16; i += 2)
43414 if (d->perm[i] + 1 != d->perm[i + 1])
43415 break;
43416 if (i < 16)
43418 /* See if words move the same in both lanes. If not,
43419 vpblendvb must be used. */
43420 for (i = 0; i < 8; i++)
43421 if (d->perm[i] + 8 != d->perm[i + 8])
43423 /* Use vpblendvb. */
43424 for (i = 0; i < 32; ++i)
43425 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
43427 vmode = V32QImode;
43428 nelt = 32;
43429 target = gen_reg_rtx (vmode);
43430 op0 = gen_lowpart (vmode, op0);
43431 op1 = gen_lowpart (vmode, op1);
43432 goto finish_pblendvb;
43435 /* Use vpblendw. */
43436 for (i = 0; i < 16; ++i)
43437 mask |= (d->perm[i] >= 16) << i;
43438 break;
43441 /* Use vpblendd. */
43442 for (i = 0; i < 8; ++i)
43443 mask |= (d->perm[i * 2] >= 16) << i;
43444 vmode = V8SImode;
43445 goto do_subreg;
43447 case V4DImode:
43448 /* Use vpblendd. */
43449 for (i = 0; i < 4; ++i)
43450 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
43451 vmode = V8SImode;
43452 goto do_subreg;
43454 default:
43455 gcc_unreachable ();
43458 /* This matches five different patterns with the different modes. */
43459 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
43460 x = gen_rtx_SET (VOIDmode, target, x);
43461 emit_insn (x);
43462 if (target != d->target)
43463 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43465 return true;
43468 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43469 in terms of the variable form of vpermilps.
43471 Note that we will have already failed the immediate input vpermilps,
43472 which requires that the high and low part shuffle be identical; the
43473 variable form doesn't require that. */
43475 static bool
43476 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
43478 rtx rperm[8], vperm;
43479 unsigned i;
43481 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
43482 return false;
43484 /* We can only permute within the 128-bit lane. */
43485 for (i = 0; i < 8; ++i)
43487 unsigned e = d->perm[i];
43488 if (i < 4 ? e >= 4 : e < 4)
43489 return false;
43492 if (d->testing_p)
43493 return true;
43495 for (i = 0; i < 8; ++i)
43497 unsigned e = d->perm[i];
43499 /* Within each 128-bit lane, the elements of op0 are numbered
43500 from 0 and the elements of op1 are numbered from 4. */
43501 if (e >= 8 + 4)
43502 e -= 8;
43503 else if (e >= 4)
43504 e -= 4;
43506 rperm[i] = GEN_INT (e);
43509 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
43510 vperm = force_reg (V8SImode, vperm);
43511 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
43513 return true;
43516 /* Return true if permutation D can be performed as VMODE permutation
43517 instead. */
43519 static bool
43520 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
43522 unsigned int i, j, chunk;
43524 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
43525 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
43526 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
43527 return false;
43529 if (GET_MODE_NUNITS (vmode) >= d->nelt)
43530 return true;
43532 chunk = d->nelt / GET_MODE_NUNITS (vmode);
43533 for (i = 0; i < d->nelt; i += chunk)
43534 if (d->perm[i] & (chunk - 1))
43535 return false;
43536 else
43537 for (j = 1; j < chunk; ++j)
43538 if (d->perm[i] + j != d->perm[i + j])
43539 return false;
43541 return true;
43544 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43545 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
43547 static bool
43548 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
43550 unsigned i, nelt, eltsz, mask;
43551 unsigned char perm[32];
43552 enum machine_mode vmode = V16QImode;
43553 rtx rperm[32], vperm, target, op0, op1;
43555 nelt = d->nelt;
43557 if (!d->one_operand_p)
43559 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
43561 if (TARGET_AVX2
43562 && valid_perm_using_mode_p (V2TImode, d))
43564 if (d->testing_p)
43565 return true;
43567 /* Use vperm2i128 insn. The pattern uses
43568 V4DImode instead of V2TImode. */
43569 target = d->target;
43570 if (d->vmode != V4DImode)
43571 target = gen_reg_rtx (V4DImode);
43572 op0 = gen_lowpart (V4DImode, d->op0);
43573 op1 = gen_lowpart (V4DImode, d->op1);
43574 rperm[0]
43575 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
43576 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
43577 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
43578 if (target != d->target)
43579 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43580 return true;
43582 return false;
43585 else
43587 if (GET_MODE_SIZE (d->vmode) == 16)
43589 if (!TARGET_SSSE3)
43590 return false;
43592 else if (GET_MODE_SIZE (d->vmode) == 32)
43594 if (!TARGET_AVX2)
43595 return false;
43597 /* V4DImode should be already handled through
43598 expand_vselect by vpermq instruction. */
43599 gcc_assert (d->vmode != V4DImode);
43601 vmode = V32QImode;
43602 if (d->vmode == V8SImode
43603 || d->vmode == V16HImode
43604 || d->vmode == V32QImode)
43606 /* First see if vpermq can be used for
43607 V8SImode/V16HImode/V32QImode. */
43608 if (valid_perm_using_mode_p (V4DImode, d))
43610 for (i = 0; i < 4; i++)
43611 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
43612 if (d->testing_p)
43613 return true;
43614 target = gen_reg_rtx (V4DImode);
43615 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
43616 perm, 4, false))
43618 emit_move_insn (d->target,
43619 gen_lowpart (d->vmode, target));
43620 return true;
43622 return false;
43625 /* Next see if vpermd can be used. */
43626 if (valid_perm_using_mode_p (V8SImode, d))
43627 vmode = V8SImode;
43629 /* Or if vpermps can be used. */
43630 else if (d->vmode == V8SFmode)
43631 vmode = V8SImode;
43633 if (vmode == V32QImode)
43635 /* vpshufb only works intra lanes, it is not
43636 possible to shuffle bytes in between the lanes. */
43637 for (i = 0; i < nelt; ++i)
43638 if ((d->perm[i] ^ i) & (nelt / 2))
43639 return false;
43642 else
43643 return false;
43646 if (d->testing_p)
43647 return true;
43649 if (vmode == V8SImode)
43650 for (i = 0; i < 8; ++i)
43651 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
43652 else
43654 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43655 if (!d->one_operand_p)
43656 mask = 2 * nelt - 1;
43657 else if (vmode == V16QImode)
43658 mask = nelt - 1;
43659 else
43660 mask = nelt / 2 - 1;
43662 for (i = 0; i < nelt; ++i)
43664 unsigned j, e = d->perm[i] & mask;
43665 for (j = 0; j < eltsz; ++j)
43666 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
43670 vperm = gen_rtx_CONST_VECTOR (vmode,
43671 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
43672 vperm = force_reg (vmode, vperm);
43674 target = d->target;
43675 if (d->vmode != vmode)
43676 target = gen_reg_rtx (vmode);
43677 op0 = gen_lowpart (vmode, d->op0);
43678 if (d->one_operand_p)
43680 if (vmode == V16QImode)
43681 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
43682 else if (vmode == V32QImode)
43683 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
43684 else if (vmode == V8SFmode)
43685 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
43686 else
43687 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
43689 else
43691 op1 = gen_lowpart (vmode, d->op1);
43692 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
43694 if (target != d->target)
43695 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43697 return true;
43700 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43701 in a single instruction. */
43703 static bool
43704 expand_vec_perm_1 (struct expand_vec_perm_d *d)
43706 unsigned i, nelt = d->nelt;
43707 unsigned char perm2[MAX_VECT_LEN];
43709 /* Check plain VEC_SELECT first, because AVX has instructions that could
43710 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43711 input where SEL+CONCAT may not. */
43712 if (d->one_operand_p)
43714 int mask = nelt - 1;
43715 bool identity_perm = true;
43716 bool broadcast_perm = true;
43718 for (i = 0; i < nelt; i++)
43720 perm2[i] = d->perm[i] & mask;
43721 if (perm2[i] != i)
43722 identity_perm = false;
43723 if (perm2[i])
43724 broadcast_perm = false;
43727 if (identity_perm)
43729 if (!d->testing_p)
43730 emit_move_insn (d->target, d->op0);
43731 return true;
43733 else if (broadcast_perm && TARGET_AVX2)
43735 /* Use vpbroadcast{b,w,d}. */
43736 rtx (*gen) (rtx, rtx) = NULL;
43737 switch (d->vmode)
43739 case V32QImode:
43740 gen = gen_avx2_pbroadcastv32qi_1;
43741 break;
43742 case V16HImode:
43743 gen = gen_avx2_pbroadcastv16hi_1;
43744 break;
43745 case V8SImode:
43746 gen = gen_avx2_pbroadcastv8si_1;
43747 break;
43748 case V16QImode:
43749 gen = gen_avx2_pbroadcastv16qi;
43750 break;
43751 case V8HImode:
43752 gen = gen_avx2_pbroadcastv8hi;
43753 break;
43754 case V8SFmode:
43755 gen = gen_avx2_vec_dupv8sf_1;
43756 break;
43757 /* For other modes prefer other shuffles this function creates. */
43758 default: break;
43760 if (gen != NULL)
43762 if (!d->testing_p)
43763 emit_insn (gen (d->target, d->op0));
43764 return true;
43768 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43769 return true;
43771 /* There are plenty of patterns in sse.md that are written for
43772 SEL+CONCAT and are not replicated for a single op. Perhaps
43773 that should be changed, to avoid the nastiness here. */
43775 /* Recognize interleave style patterns, which means incrementing
43776 every other permutation operand. */
43777 for (i = 0; i < nelt; i += 2)
43779 perm2[i] = d->perm[i] & mask;
43780 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43782 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43783 d->testing_p))
43784 return true;
43786 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43787 if (nelt >= 4)
43789 for (i = 0; i < nelt; i += 4)
43791 perm2[i + 0] = d->perm[i + 0] & mask;
43792 perm2[i + 1] = d->perm[i + 1] & mask;
43793 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43794 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43797 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43798 d->testing_p))
43799 return true;
43803 /* Finally, try the fully general two operand permute. */
43804 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43805 d->testing_p))
43806 return true;
43808 /* Recognize interleave style patterns with reversed operands. */
43809 if (!d->one_operand_p)
43811 for (i = 0; i < nelt; ++i)
43813 unsigned e = d->perm[i];
43814 if (e >= nelt)
43815 e -= nelt;
43816 else
43817 e += nelt;
43818 perm2[i] = e;
43821 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43822 d->testing_p))
43823 return true;
43826 /* Try the SSE4.1 blend variable merge instructions. */
43827 if (expand_vec_perm_blend (d))
43828 return true;
43830 /* Try one of the AVX vpermil variable permutations. */
43831 if (expand_vec_perm_vpermil (d))
43832 return true;
43834 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43835 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43836 if (expand_vec_perm_pshufb (d))
43837 return true;
43839 /* Try the AVX512F vpermi2 instructions. */
43840 rtx vec[64];
43841 enum machine_mode mode = d->vmode;
43842 if (mode == V8DFmode)
43843 mode = V8DImode;
43844 else if (mode == V16SFmode)
43845 mode = V16SImode;
43846 for (i = 0; i < nelt; ++i)
43847 vec[i] = GEN_INT (d->perm[i]);
43848 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43849 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43850 return true;
43852 return false;
43855 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43856 in terms of a pair of pshuflw + pshufhw instructions. */
43858 static bool
43859 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43861 unsigned char perm2[MAX_VECT_LEN];
43862 unsigned i;
43863 bool ok;
43865 if (d->vmode != V8HImode || !d->one_operand_p)
43866 return false;
43868 /* The two permutations only operate in 64-bit lanes. */
43869 for (i = 0; i < 4; ++i)
43870 if (d->perm[i] >= 4)
43871 return false;
43872 for (i = 4; i < 8; ++i)
43873 if (d->perm[i] < 4)
43874 return false;
43876 if (d->testing_p)
43877 return true;
43879 /* Emit the pshuflw. */
43880 memcpy (perm2, d->perm, 4);
43881 for (i = 4; i < 8; ++i)
43882 perm2[i] = i;
43883 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43884 gcc_assert (ok);
43886 /* Emit the pshufhw. */
43887 memcpy (perm2 + 4, d->perm + 4, 4);
43888 for (i = 0; i < 4; ++i)
43889 perm2[i] = i;
43890 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43891 gcc_assert (ok);
43893 return true;
43896 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43897 the permutation using the SSSE3 palignr instruction. This succeeds
43898 when all of the elements in PERM fit within one vector and we merely
43899 need to shift them down so that a single vector permutation has a
43900 chance to succeed. */
43902 static bool
43903 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43905 unsigned i, nelt = d->nelt;
43906 unsigned min, max;
43907 bool in_order, ok;
43908 rtx shift, target;
43909 struct expand_vec_perm_d dcopy;
43911 /* Even with AVX, palignr only operates on 128-bit vectors. */
43912 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43913 return false;
43915 min = nelt, max = 0;
43916 for (i = 0; i < nelt; ++i)
43918 unsigned e = d->perm[i];
43919 if (e < min)
43920 min = e;
43921 if (e > max)
43922 max = e;
43924 if (min == 0 || max - min >= nelt)
43925 return false;
43927 /* Given that we have SSSE3, we know we'll be able to implement the
43928 single operand permutation after the palignr with pshufb. */
43929 if (d->testing_p)
43930 return true;
43932 dcopy = *d;
43933 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43934 target = gen_reg_rtx (TImode);
43935 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43936 gen_lowpart (TImode, d->op0), shift));
43938 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43939 dcopy.one_operand_p = true;
43941 in_order = true;
43942 for (i = 0; i < nelt; ++i)
43944 unsigned e = dcopy.perm[i] - min;
43945 if (e != i)
43946 in_order = false;
43947 dcopy.perm[i] = e;
43950 /* Test for the degenerate case where the alignment by itself
43951 produces the desired permutation. */
43952 if (in_order)
43954 emit_move_insn (d->target, dcopy.op0);
43955 return true;
43958 ok = expand_vec_perm_1 (&dcopy);
43959 gcc_assert (ok);
43961 return ok;
43964 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43966 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43967 a two vector permutation into a single vector permutation by using
43968 an interleave operation to merge the vectors. */
43970 static bool
43971 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43973 struct expand_vec_perm_d dremap, dfinal;
43974 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43975 unsigned HOST_WIDE_INT contents;
43976 unsigned char remap[2 * MAX_VECT_LEN];
43977 rtx seq;
43978 bool ok, same_halves = false;
43980 if (GET_MODE_SIZE (d->vmode) == 16)
43982 if (d->one_operand_p)
43983 return false;
43985 else if (GET_MODE_SIZE (d->vmode) == 32)
43987 if (!TARGET_AVX)
43988 return false;
43989 /* For 32-byte modes allow even d->one_operand_p.
43990 The lack of cross-lane shuffling in some instructions
43991 might prevent a single insn shuffle. */
43992 dfinal = *d;
43993 dfinal.testing_p = true;
43994 /* If expand_vec_perm_interleave3 can expand this into
43995 a 3 insn sequence, give up and let it be expanded as
43996 3 insn sequence. While that is one insn longer,
43997 it doesn't need a memory operand and in the common
43998 case that both interleave low and high permutations
43999 with the same operands are adjacent needs 4 insns
44000 for both after CSE. */
44001 if (expand_vec_perm_interleave3 (&dfinal))
44002 return false;
44004 else
44005 return false;
44007 /* Examine from whence the elements come. */
44008 contents = 0;
44009 for (i = 0; i < nelt; ++i)
44010 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
44012 memset (remap, 0xff, sizeof (remap));
44013 dremap = *d;
44015 if (GET_MODE_SIZE (d->vmode) == 16)
44017 unsigned HOST_WIDE_INT h1, h2, h3, h4;
44019 /* Split the two input vectors into 4 halves. */
44020 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
44021 h2 = h1 << nelt2;
44022 h3 = h2 << nelt2;
44023 h4 = h3 << nelt2;
44025 /* If the elements from the low halves use interleave low, and similarly
44026 for interleave high. If the elements are from mis-matched halves, we
44027 can use shufps for V4SF/V4SI or do a DImode shuffle. */
44028 if ((contents & (h1 | h3)) == contents)
44030 /* punpckl* */
44031 for (i = 0; i < nelt2; ++i)
44033 remap[i] = i * 2;
44034 remap[i + nelt] = i * 2 + 1;
44035 dremap.perm[i * 2] = i;
44036 dremap.perm[i * 2 + 1] = i + nelt;
44038 if (!TARGET_SSE2 && d->vmode == V4SImode)
44039 dremap.vmode = V4SFmode;
44041 else if ((contents & (h2 | h4)) == contents)
44043 /* punpckh* */
44044 for (i = 0; i < nelt2; ++i)
44046 remap[i + nelt2] = i * 2;
44047 remap[i + nelt + nelt2] = i * 2 + 1;
44048 dremap.perm[i * 2] = i + nelt2;
44049 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
44051 if (!TARGET_SSE2 && d->vmode == V4SImode)
44052 dremap.vmode = V4SFmode;
44054 else if ((contents & (h1 | h4)) == contents)
44056 /* shufps */
44057 for (i = 0; i < nelt2; ++i)
44059 remap[i] = i;
44060 remap[i + nelt + nelt2] = i + nelt2;
44061 dremap.perm[i] = i;
44062 dremap.perm[i + nelt2] = i + nelt + nelt2;
44064 if (nelt != 4)
44066 /* shufpd */
44067 dremap.vmode = V2DImode;
44068 dremap.nelt = 2;
44069 dremap.perm[0] = 0;
44070 dremap.perm[1] = 3;
44073 else if ((contents & (h2 | h3)) == contents)
44075 /* shufps */
44076 for (i = 0; i < nelt2; ++i)
44078 remap[i + nelt2] = i;
44079 remap[i + nelt] = i + nelt2;
44080 dremap.perm[i] = i + nelt2;
44081 dremap.perm[i + nelt2] = i + nelt;
44083 if (nelt != 4)
44085 /* shufpd */
44086 dremap.vmode = V2DImode;
44087 dremap.nelt = 2;
44088 dremap.perm[0] = 1;
44089 dremap.perm[1] = 2;
44092 else
44093 return false;
44095 else
44097 unsigned int nelt4 = nelt / 4, nzcnt = 0;
44098 unsigned HOST_WIDE_INT q[8];
44099 unsigned int nonzero_halves[4];
44101 /* Split the two input vectors into 8 quarters. */
44102 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
44103 for (i = 1; i < 8; ++i)
44104 q[i] = q[0] << (nelt4 * i);
44105 for (i = 0; i < 4; ++i)
44106 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
44108 nonzero_halves[nzcnt] = i;
44109 ++nzcnt;
44112 if (nzcnt == 1)
44114 gcc_assert (d->one_operand_p);
44115 nonzero_halves[1] = nonzero_halves[0];
44116 same_halves = true;
44118 else if (d->one_operand_p)
44120 gcc_assert (nonzero_halves[0] == 0);
44121 gcc_assert (nonzero_halves[1] == 1);
44124 if (nzcnt <= 2)
44126 if (d->perm[0] / nelt2 == nonzero_halves[1])
44128 /* Attempt to increase the likelihood that dfinal
44129 shuffle will be intra-lane. */
44130 char tmph = nonzero_halves[0];
44131 nonzero_halves[0] = nonzero_halves[1];
44132 nonzero_halves[1] = tmph;
44135 /* vperm2f128 or vperm2i128. */
44136 for (i = 0; i < nelt2; ++i)
44138 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
44139 remap[i + nonzero_halves[0] * nelt2] = i;
44140 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
44141 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
44144 if (d->vmode != V8SFmode
44145 && d->vmode != V4DFmode
44146 && d->vmode != V8SImode)
44148 dremap.vmode = V8SImode;
44149 dremap.nelt = 8;
44150 for (i = 0; i < 4; ++i)
44152 dremap.perm[i] = i + nonzero_halves[0] * 4;
44153 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
44157 else if (d->one_operand_p)
44158 return false;
44159 else if (TARGET_AVX2
44160 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
44162 /* vpunpckl* */
44163 for (i = 0; i < nelt4; ++i)
44165 remap[i] = i * 2;
44166 remap[i + nelt] = i * 2 + 1;
44167 remap[i + nelt2] = i * 2 + nelt2;
44168 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
44169 dremap.perm[i * 2] = i;
44170 dremap.perm[i * 2 + 1] = i + nelt;
44171 dremap.perm[i * 2 + nelt2] = i + nelt2;
44172 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
44175 else if (TARGET_AVX2
44176 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
44178 /* vpunpckh* */
44179 for (i = 0; i < nelt4; ++i)
44181 remap[i + nelt4] = i * 2;
44182 remap[i + nelt + nelt4] = i * 2 + 1;
44183 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
44184 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
44185 dremap.perm[i * 2] = i + nelt4;
44186 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
44187 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
44188 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
44191 else
44192 return false;
44195 /* Use the remapping array set up above to move the elements from their
44196 swizzled locations into their final destinations. */
44197 dfinal = *d;
44198 for (i = 0; i < nelt; ++i)
44200 unsigned e = remap[d->perm[i]];
44201 gcc_assert (e < nelt);
44202 /* If same_halves is true, both halves of the remapped vector are the
44203 same. Avoid cross-lane accesses if possible. */
44204 if (same_halves && i >= nelt2)
44206 gcc_assert (e < nelt2);
44207 dfinal.perm[i] = e + nelt2;
44209 else
44210 dfinal.perm[i] = e;
44212 if (!d->testing_p)
44214 dremap.target = gen_reg_rtx (dremap.vmode);
44215 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
44217 dfinal.op1 = dfinal.op0;
44218 dfinal.one_operand_p = true;
44220 /* Test if the final remap can be done with a single insn. For V4SFmode or
44221 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
44222 start_sequence ();
44223 ok = expand_vec_perm_1 (&dfinal);
44224 seq = get_insns ();
44225 end_sequence ();
44227 if (!ok)
44228 return false;
44230 if (d->testing_p)
44231 return true;
44233 if (dremap.vmode != dfinal.vmode)
44235 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
44236 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
44239 ok = expand_vec_perm_1 (&dremap);
44240 gcc_assert (ok);
44242 emit_insn (seq);
44243 return true;
44246 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
44247 a single vector cross-lane permutation into vpermq followed
44248 by any of the single insn permutations. */
44250 static bool
44251 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
44253 struct expand_vec_perm_d dremap, dfinal;
44254 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
44255 unsigned contents[2];
44256 bool ok;
44258 if (!(TARGET_AVX2
44259 && (d->vmode == V32QImode || d->vmode == V16HImode)
44260 && d->one_operand_p))
44261 return false;
44263 contents[0] = 0;
44264 contents[1] = 0;
44265 for (i = 0; i < nelt2; ++i)
44267 contents[0] |= 1u << (d->perm[i] / nelt4);
44268 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
44271 for (i = 0; i < 2; ++i)
44273 unsigned int cnt = 0;
44274 for (j = 0; j < 4; ++j)
44275 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
44276 return false;
44279 if (d->testing_p)
44280 return true;
44282 dremap = *d;
44283 dremap.vmode = V4DImode;
44284 dremap.nelt = 4;
44285 dremap.target = gen_reg_rtx (V4DImode);
44286 dremap.op0 = gen_lowpart (V4DImode, d->op0);
44287 dremap.op1 = dremap.op0;
44288 dremap.one_operand_p = true;
44289 for (i = 0; i < 2; ++i)
44291 unsigned int cnt = 0;
44292 for (j = 0; j < 4; ++j)
44293 if ((contents[i] & (1u << j)) != 0)
44294 dremap.perm[2 * i + cnt++] = j;
44295 for (; cnt < 2; ++cnt)
44296 dremap.perm[2 * i + cnt] = 0;
44299 dfinal = *d;
44300 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
44301 dfinal.op1 = dfinal.op0;
44302 dfinal.one_operand_p = true;
44303 for (i = 0, j = 0; i < nelt; ++i)
44305 if (i == nelt2)
44306 j = 2;
44307 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
44308 if ((d->perm[i] / nelt4) == dremap.perm[j])
44310 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
44311 dfinal.perm[i] |= nelt4;
44312 else
44313 gcc_unreachable ();
44316 ok = expand_vec_perm_1 (&dremap);
44317 gcc_assert (ok);
44319 ok = expand_vec_perm_1 (&dfinal);
44320 gcc_assert (ok);
44322 return true;
44325 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
44326 a vector permutation using two instructions, vperm2f128 resp.
44327 vperm2i128 followed by any single in-lane permutation. */
44329 static bool
44330 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
44332 struct expand_vec_perm_d dfirst, dsecond;
44333 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
44334 bool ok;
44336 if (!TARGET_AVX
44337 || GET_MODE_SIZE (d->vmode) != 32
44338 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
44339 return false;
44341 dsecond = *d;
44342 dsecond.one_operand_p = false;
44343 dsecond.testing_p = true;
44345 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
44346 immediate. For perm < 16 the second permutation uses
44347 d->op0 as first operand, for perm >= 16 it uses d->op1
44348 as first operand. The second operand is the result of
44349 vperm2[fi]128. */
44350 for (perm = 0; perm < 32; perm++)
44352 /* Ignore permutations which do not move anything cross-lane. */
44353 if (perm < 16)
44355 /* The second shuffle for e.g. V4DFmode has
44356 0123 and ABCD operands.
44357 Ignore AB23, as 23 is already in the second lane
44358 of the first operand. */
44359 if ((perm & 0xc) == (1 << 2)) continue;
44360 /* And 01CD, as 01 is in the first lane of the first
44361 operand. */
44362 if ((perm & 3) == 0) continue;
44363 /* And 4567, as then the vperm2[fi]128 doesn't change
44364 anything on the original 4567 second operand. */
44365 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
44367 else
44369 /* The second shuffle for e.g. V4DFmode has
44370 4567 and ABCD operands.
44371 Ignore AB67, as 67 is already in the second lane
44372 of the first operand. */
44373 if ((perm & 0xc) == (3 << 2)) continue;
44374 /* And 45CD, as 45 is in the first lane of the first
44375 operand. */
44376 if ((perm & 3) == 2) continue;
44377 /* And 0123, as then the vperm2[fi]128 doesn't change
44378 anything on the original 0123 first operand. */
44379 if ((perm & 0xf) == (1 << 2)) continue;
44382 for (i = 0; i < nelt; i++)
44384 j = d->perm[i] / nelt2;
44385 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
44386 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
44387 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
44388 dsecond.perm[i] = d->perm[i] & (nelt - 1);
44389 else
44390 break;
44393 if (i == nelt)
44395 start_sequence ();
44396 ok = expand_vec_perm_1 (&dsecond);
44397 end_sequence ();
44399 else
44400 ok = false;
44402 if (ok)
44404 if (d->testing_p)
44405 return true;
44407 /* Found a usable second shuffle. dfirst will be
44408 vperm2f128 on d->op0 and d->op1. */
44409 dsecond.testing_p = false;
44410 dfirst = *d;
44411 dfirst.target = gen_reg_rtx (d->vmode);
44412 for (i = 0; i < nelt; i++)
44413 dfirst.perm[i] = (i & (nelt2 - 1))
44414 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
44416 ok = expand_vec_perm_1 (&dfirst);
44417 gcc_assert (ok);
44419 /* And dsecond is some single insn shuffle, taking
44420 d->op0 and result of vperm2f128 (if perm < 16) or
44421 d->op1 and result of vperm2f128 (otherwise). */
44422 dsecond.op1 = dfirst.target;
44423 if (perm >= 16)
44424 dsecond.op0 = dfirst.op1;
44426 ok = expand_vec_perm_1 (&dsecond);
44427 gcc_assert (ok);
44429 return true;
44432 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
44433 if (d->one_operand_p)
44434 return false;
44437 return false;
44440 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
44441 a two vector permutation using 2 intra-lane interleave insns
44442 and cross-lane shuffle for 32-byte vectors. */
44444 static bool
44445 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
44447 unsigned i, nelt;
44448 rtx (*gen) (rtx, rtx, rtx);
44450 if (d->one_operand_p)
44451 return false;
44452 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
44454 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
44456 else
44457 return false;
44459 nelt = d->nelt;
44460 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
44461 return false;
44462 for (i = 0; i < nelt; i += 2)
44463 if (d->perm[i] != d->perm[0] + i / 2
44464 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
44465 return false;
44467 if (d->testing_p)
44468 return true;
44470 switch (d->vmode)
44472 case V32QImode:
44473 if (d->perm[0])
44474 gen = gen_vec_interleave_highv32qi;
44475 else
44476 gen = gen_vec_interleave_lowv32qi;
44477 break;
44478 case V16HImode:
44479 if (d->perm[0])
44480 gen = gen_vec_interleave_highv16hi;
44481 else
44482 gen = gen_vec_interleave_lowv16hi;
44483 break;
44484 case V8SImode:
44485 if (d->perm[0])
44486 gen = gen_vec_interleave_highv8si;
44487 else
44488 gen = gen_vec_interleave_lowv8si;
44489 break;
44490 case V4DImode:
44491 if (d->perm[0])
44492 gen = gen_vec_interleave_highv4di;
44493 else
44494 gen = gen_vec_interleave_lowv4di;
44495 break;
44496 case V8SFmode:
44497 if (d->perm[0])
44498 gen = gen_vec_interleave_highv8sf;
44499 else
44500 gen = gen_vec_interleave_lowv8sf;
44501 break;
44502 case V4DFmode:
44503 if (d->perm[0])
44504 gen = gen_vec_interleave_highv4df;
44505 else
44506 gen = gen_vec_interleave_lowv4df;
44507 break;
44508 default:
44509 gcc_unreachable ();
44512 emit_insn (gen (d->target, d->op0, d->op1));
44513 return true;
44516 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
44517 a single vector permutation using a single intra-lane vector
44518 permutation, vperm2f128 swapping the lanes and vblend* insn blending
44519 the non-swapped and swapped vectors together. */
44521 static bool
44522 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
44524 struct expand_vec_perm_d dfirst, dsecond;
44525 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
44526 rtx seq;
44527 bool ok;
44528 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
44530 if (!TARGET_AVX
44531 || TARGET_AVX2
44532 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
44533 || !d->one_operand_p)
44534 return false;
44536 dfirst = *d;
44537 for (i = 0; i < nelt; i++)
44538 dfirst.perm[i] = 0xff;
44539 for (i = 0, msk = 0; i < nelt; i++)
44541 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
44542 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
44543 return false;
44544 dfirst.perm[j] = d->perm[i];
44545 if (j != i)
44546 msk |= (1 << i);
44548 for (i = 0; i < nelt; i++)
44549 if (dfirst.perm[i] == 0xff)
44550 dfirst.perm[i] = i;
44552 if (!d->testing_p)
44553 dfirst.target = gen_reg_rtx (dfirst.vmode);
44555 start_sequence ();
44556 ok = expand_vec_perm_1 (&dfirst);
44557 seq = get_insns ();
44558 end_sequence ();
44560 if (!ok)
44561 return false;
44563 if (d->testing_p)
44564 return true;
44566 emit_insn (seq);
44568 dsecond = *d;
44569 dsecond.op0 = dfirst.target;
44570 dsecond.op1 = dfirst.target;
44571 dsecond.one_operand_p = true;
44572 dsecond.target = gen_reg_rtx (dsecond.vmode);
44573 for (i = 0; i < nelt; i++)
44574 dsecond.perm[i] = i ^ nelt2;
44576 ok = expand_vec_perm_1 (&dsecond);
44577 gcc_assert (ok);
44579 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
44580 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
44581 return true;
44584 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
44585 permutation using two vperm2f128, followed by a vshufpd insn blending
44586 the two vectors together. */
44588 static bool
44589 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
44591 struct expand_vec_perm_d dfirst, dsecond, dthird;
44592 bool ok;
44594 if (!TARGET_AVX || (d->vmode != V4DFmode))
44595 return false;
44597 if (d->testing_p)
44598 return true;
44600 dfirst = *d;
44601 dsecond = *d;
44602 dthird = *d;
44604 dfirst.perm[0] = (d->perm[0] & ~1);
44605 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
44606 dfirst.perm[2] = (d->perm[2] & ~1);
44607 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
44608 dsecond.perm[0] = (d->perm[1] & ~1);
44609 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
44610 dsecond.perm[2] = (d->perm[3] & ~1);
44611 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
44612 dthird.perm[0] = (d->perm[0] % 2);
44613 dthird.perm[1] = (d->perm[1] % 2) + 4;
44614 dthird.perm[2] = (d->perm[2] % 2) + 2;
44615 dthird.perm[3] = (d->perm[3] % 2) + 6;
44617 dfirst.target = gen_reg_rtx (dfirst.vmode);
44618 dsecond.target = gen_reg_rtx (dsecond.vmode);
44619 dthird.op0 = dfirst.target;
44620 dthird.op1 = dsecond.target;
44621 dthird.one_operand_p = false;
44623 canonicalize_perm (&dfirst);
44624 canonicalize_perm (&dsecond);
44626 ok = expand_vec_perm_1 (&dfirst)
44627 && expand_vec_perm_1 (&dsecond)
44628 && expand_vec_perm_1 (&dthird);
44630 gcc_assert (ok);
44632 return true;
44635 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44636 permutation with two pshufb insns and an ior. We should have already
44637 failed all two instruction sequences. */
44639 static bool
44640 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
44642 rtx rperm[2][16], vperm, l, h, op, m128;
44643 unsigned int i, nelt, eltsz;
44645 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
44646 return false;
44647 gcc_assert (!d->one_operand_p);
44649 if (d->testing_p)
44650 return true;
44652 nelt = d->nelt;
44653 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44655 /* Generate two permutation masks. If the required element is within
44656 the given vector it is shuffled into the proper lane. If the required
44657 element is in the other vector, force a zero into the lane by setting
44658 bit 7 in the permutation mask. */
44659 m128 = GEN_INT (-128);
44660 for (i = 0; i < nelt; ++i)
44662 unsigned j, e = d->perm[i];
44663 unsigned which = (e >= nelt);
44664 if (e >= nelt)
44665 e -= nelt;
44667 for (j = 0; j < eltsz; ++j)
44669 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
44670 rperm[1-which][i*eltsz + j] = m128;
44674 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
44675 vperm = force_reg (V16QImode, vperm);
44677 l = gen_reg_rtx (V16QImode);
44678 op = gen_lowpart (V16QImode, d->op0);
44679 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
44681 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
44682 vperm = force_reg (V16QImode, vperm);
44684 h = gen_reg_rtx (V16QImode);
44685 op = gen_lowpart (V16QImode, d->op1);
44686 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44688 op = d->target;
44689 if (d->vmode != V16QImode)
44690 op = gen_reg_rtx (V16QImode);
44691 emit_insn (gen_iorv16qi3 (op, l, h));
44692 if (op != d->target)
44693 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44695 return true;
44698 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44699 with two vpshufb insns, vpermq and vpor. We should have already failed
44700 all two or three instruction sequences. */
44702 static bool
44703 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44705 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44706 unsigned int i, nelt, eltsz;
44708 if (!TARGET_AVX2
44709 || !d->one_operand_p
44710 || (d->vmode != V32QImode && d->vmode != V16HImode))
44711 return false;
44713 if (d->testing_p)
44714 return true;
44716 nelt = d->nelt;
44717 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44719 /* Generate two permutation masks. If the required element is within
44720 the same lane, it is shuffled in. If the required element from the
44721 other lane, force a zero by setting bit 7 in the permutation mask.
44722 In the other mask the mask has non-negative elements if element
44723 is requested from the other lane, but also moved to the other lane,
44724 so that the result of vpshufb can have the two V2TImode halves
44725 swapped. */
44726 m128 = GEN_INT (-128);
44727 for (i = 0; i < nelt; ++i)
44729 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44730 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44732 for (j = 0; j < eltsz; ++j)
44734 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44735 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44739 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44740 vperm = force_reg (V32QImode, vperm);
44742 h = gen_reg_rtx (V32QImode);
44743 op = gen_lowpart (V32QImode, d->op0);
44744 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44746 /* Swap the 128-byte lanes of h into hp. */
44747 hp = gen_reg_rtx (V4DImode);
44748 op = gen_lowpart (V4DImode, h);
44749 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44750 const1_rtx));
44752 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44753 vperm = force_reg (V32QImode, vperm);
44755 l = gen_reg_rtx (V32QImode);
44756 op = gen_lowpart (V32QImode, d->op0);
44757 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44759 op = d->target;
44760 if (d->vmode != V32QImode)
44761 op = gen_reg_rtx (V32QImode);
44762 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44763 if (op != d->target)
44764 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44766 return true;
44769 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44770 and extract-odd permutations of two V32QImode and V16QImode operand
44771 with two vpshufb insns, vpor and vpermq. We should have already
44772 failed all two or three instruction sequences. */
44774 static bool
44775 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44777 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44778 unsigned int i, nelt, eltsz;
44780 if (!TARGET_AVX2
44781 || d->one_operand_p
44782 || (d->vmode != V32QImode && d->vmode != V16HImode))
44783 return false;
44785 for (i = 0; i < d->nelt; ++i)
44786 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44787 return false;
44789 if (d->testing_p)
44790 return true;
44792 nelt = d->nelt;
44793 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44795 /* Generate two permutation masks. In the first permutation mask
44796 the first quarter will contain indexes for the first half
44797 of the op0, the second quarter will contain bit 7 set, third quarter
44798 will contain indexes for the second half of the op0 and the
44799 last quarter bit 7 set. In the second permutation mask
44800 the first quarter will contain bit 7 set, the second quarter
44801 indexes for the first half of the op1, the third quarter bit 7 set
44802 and last quarter indexes for the second half of the op1.
44803 I.e. the first mask e.g. for V32QImode extract even will be:
44804 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44805 (all values masked with 0xf except for -128) and second mask
44806 for extract even will be
44807 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44808 m128 = GEN_INT (-128);
44809 for (i = 0; i < nelt; ++i)
44811 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44812 unsigned which = d->perm[i] >= nelt;
44813 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44815 for (j = 0; j < eltsz; ++j)
44817 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44818 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44822 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44823 vperm = force_reg (V32QImode, vperm);
44825 l = gen_reg_rtx (V32QImode);
44826 op = gen_lowpart (V32QImode, d->op0);
44827 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44829 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44830 vperm = force_reg (V32QImode, vperm);
44832 h = gen_reg_rtx (V32QImode);
44833 op = gen_lowpart (V32QImode, d->op1);
44834 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44836 ior = gen_reg_rtx (V32QImode);
44837 emit_insn (gen_iorv32qi3 (ior, l, h));
44839 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44840 op = gen_reg_rtx (V4DImode);
44841 ior = gen_lowpart (V4DImode, ior);
44842 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44843 const1_rtx, GEN_INT (3)));
44844 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44846 return true;
44849 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44850 and extract-odd permutations. */
44852 static bool
44853 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44855 rtx t1, t2, t3, t4, t5;
44857 switch (d->vmode)
44859 case V4DFmode:
44860 if (d->testing_p)
44861 break;
44862 t1 = gen_reg_rtx (V4DFmode);
44863 t2 = gen_reg_rtx (V4DFmode);
44865 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44866 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44867 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44869 /* Now an unpck[lh]pd will produce the result required. */
44870 if (odd)
44871 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44872 else
44873 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44874 emit_insn (t3);
44875 break;
44877 case V8SFmode:
44879 int mask = odd ? 0xdd : 0x88;
44881 if (d->testing_p)
44882 break;
44883 t1 = gen_reg_rtx (V8SFmode);
44884 t2 = gen_reg_rtx (V8SFmode);
44885 t3 = gen_reg_rtx (V8SFmode);
44887 /* Shuffle within the 128-bit lanes to produce:
44888 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44889 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44890 GEN_INT (mask)));
44892 /* Shuffle the lanes around to produce:
44893 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44894 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44895 GEN_INT (0x3)));
44897 /* Shuffle within the 128-bit lanes to produce:
44898 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44899 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44901 /* Shuffle within the 128-bit lanes to produce:
44902 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44903 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44905 /* Shuffle the lanes around to produce:
44906 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44907 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44908 GEN_INT (0x20)));
44910 break;
44912 case V2DFmode:
44913 case V4SFmode:
44914 case V2DImode:
44915 case V4SImode:
44916 /* These are always directly implementable by expand_vec_perm_1. */
44917 gcc_unreachable ();
44919 case V8HImode:
44920 if (TARGET_SSSE3)
44921 return expand_vec_perm_pshufb2 (d);
44922 else
44924 if (d->testing_p)
44925 break;
44926 /* We need 2*log2(N)-1 operations to achieve odd/even
44927 with interleave. */
44928 t1 = gen_reg_rtx (V8HImode);
44929 t2 = gen_reg_rtx (V8HImode);
44930 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44931 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44932 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44933 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44934 if (odd)
44935 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44936 else
44937 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44938 emit_insn (t3);
44940 break;
44942 case V16QImode:
44943 if (TARGET_SSSE3)
44944 return expand_vec_perm_pshufb2 (d);
44945 else
44947 if (d->testing_p)
44948 break;
44949 t1 = gen_reg_rtx (V16QImode);
44950 t2 = gen_reg_rtx (V16QImode);
44951 t3 = gen_reg_rtx (V16QImode);
44952 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44953 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44954 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44955 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44956 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44957 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44958 if (odd)
44959 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44960 else
44961 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44962 emit_insn (t3);
44964 break;
44966 case V16HImode:
44967 case V32QImode:
44968 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44970 case V4DImode:
44971 if (!TARGET_AVX2)
44973 struct expand_vec_perm_d d_copy = *d;
44974 d_copy.vmode = V4DFmode;
44975 if (d->testing_p)
44976 d_copy.target = gen_lowpart (V4DFmode, d->target);
44977 else
44978 d_copy.target = gen_reg_rtx (V4DFmode);
44979 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44980 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44981 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44983 if (!d->testing_p)
44984 emit_move_insn (d->target,
44985 gen_lowpart (V4DImode, d_copy.target));
44986 return true;
44988 return false;
44991 if (d->testing_p)
44992 break;
44994 t1 = gen_reg_rtx (V4DImode);
44995 t2 = gen_reg_rtx (V4DImode);
44997 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44998 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44999 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
45001 /* Now an vpunpck[lh]qdq will produce the result required. */
45002 if (odd)
45003 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
45004 else
45005 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
45006 emit_insn (t3);
45007 break;
45009 case V8SImode:
45010 if (!TARGET_AVX2)
45012 struct expand_vec_perm_d d_copy = *d;
45013 d_copy.vmode = V8SFmode;
45014 if (d->testing_p)
45015 d_copy.target = gen_lowpart (V8SFmode, d->target);
45016 else
45017 d_copy.target = gen_reg_rtx (V8SFmode);
45018 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
45019 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
45020 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
45022 if (!d->testing_p)
45023 emit_move_insn (d->target,
45024 gen_lowpart (V8SImode, d_copy.target));
45025 return true;
45027 return false;
45030 if (d->testing_p)
45031 break;
45033 t1 = gen_reg_rtx (V8SImode);
45034 t2 = gen_reg_rtx (V8SImode);
45035 t3 = gen_reg_rtx (V4DImode);
45036 t4 = gen_reg_rtx (V4DImode);
45037 t5 = gen_reg_rtx (V4DImode);
45039 /* Shuffle the lanes around into
45040 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
45041 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
45042 gen_lowpart (V4DImode, d->op1),
45043 GEN_INT (0x20)));
45044 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
45045 gen_lowpart (V4DImode, d->op1),
45046 GEN_INT (0x31)));
45048 /* Swap the 2nd and 3rd position in each lane into
45049 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
45050 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
45051 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
45052 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
45053 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
45055 /* Now an vpunpck[lh]qdq will produce
45056 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
45057 if (odd)
45058 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
45059 gen_lowpart (V4DImode, t2));
45060 else
45061 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
45062 gen_lowpart (V4DImode, t2));
45063 emit_insn (t3);
45064 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
45065 break;
45067 default:
45068 gcc_unreachable ();
45071 return true;
45074 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
45075 extract-even and extract-odd permutations. */
45077 static bool
45078 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
45080 unsigned i, odd, nelt = d->nelt;
45082 odd = d->perm[0];
45083 if (odd != 0 && odd != 1)
45084 return false;
45086 for (i = 1; i < nelt; ++i)
45087 if (d->perm[i] != 2 * i + odd)
45088 return false;
45090 return expand_vec_perm_even_odd_1 (d, odd);
45093 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
45094 permutations. We assume that expand_vec_perm_1 has already failed. */
45096 static bool
45097 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
45099 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
45100 enum machine_mode vmode = d->vmode;
45101 unsigned char perm2[4];
45102 rtx op0 = d->op0, dest;
45103 bool ok;
45105 switch (vmode)
45107 case V4DFmode:
45108 case V8SFmode:
45109 /* These are special-cased in sse.md so that we can optionally
45110 use the vbroadcast instruction. They expand to two insns
45111 if the input happens to be in a register. */
45112 gcc_unreachable ();
45114 case V2DFmode:
45115 case V2DImode:
45116 case V4SFmode:
45117 case V4SImode:
45118 /* These are always implementable using standard shuffle patterns. */
45119 gcc_unreachable ();
45121 case V8HImode:
45122 case V16QImode:
45123 /* These can be implemented via interleave. We save one insn by
45124 stopping once we have promoted to V4SImode and then use pshufd. */
45125 if (d->testing_p)
45126 return true;
45129 rtx dest;
45130 rtx (*gen) (rtx, rtx, rtx)
45131 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
45132 : gen_vec_interleave_lowv8hi;
45134 if (elt >= nelt2)
45136 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
45137 : gen_vec_interleave_highv8hi;
45138 elt -= nelt2;
45140 nelt2 /= 2;
45142 dest = gen_reg_rtx (vmode);
45143 emit_insn (gen (dest, op0, op0));
45144 vmode = get_mode_wider_vector (vmode);
45145 op0 = gen_lowpart (vmode, dest);
45147 while (vmode != V4SImode);
45149 memset (perm2, elt, 4);
45150 dest = gen_reg_rtx (V4SImode);
45151 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
45152 gcc_assert (ok);
45153 if (!d->testing_p)
45154 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
45155 return true;
45157 case V32QImode:
45158 case V16HImode:
45159 case V8SImode:
45160 case V4DImode:
45161 /* For AVX2 broadcasts of the first element vpbroadcast* or
45162 vpermq should be used by expand_vec_perm_1. */
45163 gcc_assert (!TARGET_AVX2 || d->perm[0]);
45164 return false;
45166 default:
45167 gcc_unreachable ();
45171 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
45172 broadcast permutations. */
45174 static bool
45175 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
45177 unsigned i, elt, nelt = d->nelt;
45179 if (!d->one_operand_p)
45180 return false;
45182 elt = d->perm[0];
45183 for (i = 1; i < nelt; ++i)
45184 if (d->perm[i] != elt)
45185 return false;
45187 return expand_vec_perm_broadcast_1 (d);
45190 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
45191 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
45192 all the shorter instruction sequences. */
45194 static bool
45195 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
45197 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
45198 unsigned int i, nelt, eltsz;
45199 bool used[4];
45201 if (!TARGET_AVX2
45202 || d->one_operand_p
45203 || (d->vmode != V32QImode && d->vmode != V16HImode))
45204 return false;
45206 if (d->testing_p)
45207 return true;
45209 nelt = d->nelt;
45210 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
45212 /* Generate 4 permutation masks. If the required element is within
45213 the same lane, it is shuffled in. If the required element from the
45214 other lane, force a zero by setting bit 7 in the permutation mask.
45215 In the other mask the mask has non-negative elements if element
45216 is requested from the other lane, but also moved to the other lane,
45217 so that the result of vpshufb can have the two V2TImode halves
45218 swapped. */
45219 m128 = GEN_INT (-128);
45220 for (i = 0; i < 32; ++i)
45222 rperm[0][i] = m128;
45223 rperm[1][i] = m128;
45224 rperm[2][i] = m128;
45225 rperm[3][i] = m128;
45227 used[0] = false;
45228 used[1] = false;
45229 used[2] = false;
45230 used[3] = false;
45231 for (i = 0; i < nelt; ++i)
45233 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
45234 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
45235 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
45237 for (j = 0; j < eltsz; ++j)
45238 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
45239 used[which] = true;
45242 for (i = 0; i < 2; ++i)
45244 if (!used[2 * i + 1])
45246 h[i] = NULL_RTX;
45247 continue;
45249 vperm = gen_rtx_CONST_VECTOR (V32QImode,
45250 gen_rtvec_v (32, rperm[2 * i + 1]));
45251 vperm = force_reg (V32QImode, vperm);
45252 h[i] = gen_reg_rtx (V32QImode);
45253 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
45254 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
45257 /* Swap the 128-byte lanes of h[X]. */
45258 for (i = 0; i < 2; ++i)
45260 if (h[i] == NULL_RTX)
45261 continue;
45262 op = gen_reg_rtx (V4DImode);
45263 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
45264 const2_rtx, GEN_INT (3), const0_rtx,
45265 const1_rtx));
45266 h[i] = gen_lowpart (V32QImode, op);
45269 for (i = 0; i < 2; ++i)
45271 if (!used[2 * i])
45273 l[i] = NULL_RTX;
45274 continue;
45276 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
45277 vperm = force_reg (V32QImode, vperm);
45278 l[i] = gen_reg_rtx (V32QImode);
45279 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
45280 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
45283 for (i = 0; i < 2; ++i)
45285 if (h[i] && l[i])
45287 op = gen_reg_rtx (V32QImode);
45288 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
45289 l[i] = op;
45291 else if (h[i])
45292 l[i] = h[i];
45295 gcc_assert (l[0] && l[1]);
45296 op = d->target;
45297 if (d->vmode != V32QImode)
45298 op = gen_reg_rtx (V32QImode);
45299 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
45300 if (op != d->target)
45301 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
45302 return true;
45305 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
45306 With all of the interface bits taken care of, perform the expansion
45307 in D and return true on success. */
45309 static bool
45310 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
45312 /* Try a single instruction expansion. */
45313 if (expand_vec_perm_1 (d))
45314 return true;
45316 /* Try sequences of two instructions. */
45318 if (expand_vec_perm_pshuflw_pshufhw (d))
45319 return true;
45321 if (expand_vec_perm_palignr (d))
45322 return true;
45324 if (expand_vec_perm_interleave2 (d))
45325 return true;
45327 if (expand_vec_perm_broadcast (d))
45328 return true;
45330 if (expand_vec_perm_vpermq_perm_1 (d))
45331 return true;
45333 if (expand_vec_perm_vperm2f128 (d))
45334 return true;
45336 /* Try sequences of three instructions. */
45338 if (expand_vec_perm_2vperm2f128_vshuf (d))
45339 return true;
45341 if (expand_vec_perm_pshufb2 (d))
45342 return true;
45344 if (expand_vec_perm_interleave3 (d))
45345 return true;
45347 if (expand_vec_perm_vperm2f128_vblend (d))
45348 return true;
45350 /* Try sequences of four instructions. */
45352 if (expand_vec_perm_vpshufb2_vpermq (d))
45353 return true;
45355 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
45356 return true;
45358 /* ??? Look for narrow permutations whose element orderings would
45359 allow the promotion to a wider mode. */
45361 /* ??? Look for sequences of interleave or a wider permute that place
45362 the data into the correct lanes for a half-vector shuffle like
45363 pshuf[lh]w or vpermilps. */
45365 /* ??? Look for sequences of interleave that produce the desired results.
45366 The combinatorics of punpck[lh] get pretty ugly... */
45368 if (expand_vec_perm_even_odd (d))
45369 return true;
45371 /* Even longer sequences. */
45372 if (expand_vec_perm_vpshufb4_vpermq2 (d))
45373 return true;
45375 return false;
45378 /* If a permutation only uses one operand, make it clear. Returns true
45379 if the permutation references both operands. */
45381 static bool
45382 canonicalize_perm (struct expand_vec_perm_d *d)
45384 int i, which, nelt = d->nelt;
45386 for (i = which = 0; i < nelt; ++i)
45387 which |= (d->perm[i] < nelt ? 1 : 2);
45389 d->one_operand_p = true;
45390 switch (which)
45392 default:
45393 gcc_unreachable();
45395 case 3:
45396 if (!rtx_equal_p (d->op0, d->op1))
45398 d->one_operand_p = false;
45399 break;
45401 /* The elements of PERM do not suggest that only the first operand
45402 is used, but both operands are identical. Allow easier matching
45403 of the permutation by folding the permutation into the single
45404 input vector. */
45405 /* FALLTHRU */
45407 case 2:
45408 for (i = 0; i < nelt; ++i)
45409 d->perm[i] &= nelt - 1;
45410 d->op0 = d->op1;
45411 break;
45413 case 1:
45414 d->op1 = d->op0;
45415 break;
45418 return (which == 3);
45421 bool
45422 ix86_expand_vec_perm_const (rtx operands[4])
45424 struct expand_vec_perm_d d;
45425 unsigned char perm[MAX_VECT_LEN];
45426 int i, nelt;
45427 bool two_args;
45428 rtx sel;
45430 d.target = operands[0];
45431 d.op0 = operands[1];
45432 d.op1 = operands[2];
45433 sel = operands[3];
45435 d.vmode = GET_MODE (d.target);
45436 gcc_assert (VECTOR_MODE_P (d.vmode));
45437 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45438 d.testing_p = false;
45440 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
45441 gcc_assert (XVECLEN (sel, 0) == nelt);
45442 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
45444 for (i = 0; i < nelt; ++i)
45446 rtx e = XVECEXP (sel, 0, i);
45447 int ei = INTVAL (e) & (2 * nelt - 1);
45448 d.perm[i] = ei;
45449 perm[i] = ei;
45452 two_args = canonicalize_perm (&d);
45454 if (ix86_expand_vec_perm_const_1 (&d))
45455 return true;
45457 /* If the selector says both arguments are needed, but the operands are the
45458 same, the above tried to expand with one_operand_p and flattened selector.
45459 If that didn't work, retry without one_operand_p; we succeeded with that
45460 during testing. */
45461 if (two_args && d.one_operand_p)
45463 d.one_operand_p = false;
45464 memcpy (d.perm, perm, sizeof (perm));
45465 return ix86_expand_vec_perm_const_1 (&d);
45468 return false;
45471 /* Implement targetm.vectorize.vec_perm_const_ok. */
45473 static bool
45474 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
45475 const unsigned char *sel)
45477 struct expand_vec_perm_d d;
45478 unsigned int i, nelt, which;
45479 bool ret;
45481 d.vmode = vmode;
45482 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45483 d.testing_p = true;
45485 /* Given sufficient ISA support we can just return true here
45486 for selected vector modes. */
45487 if (d.vmode == V16SImode || d.vmode == V16SFmode
45488 || d.vmode == V8DFmode || d.vmode == V8DImode)
45489 /* All implementable with a single vpermi2 insn. */
45490 return true;
45491 if (GET_MODE_SIZE (d.vmode) == 16)
45493 /* All implementable with a single vpperm insn. */
45494 if (TARGET_XOP)
45495 return true;
45496 /* All implementable with 2 pshufb + 1 ior. */
45497 if (TARGET_SSSE3)
45498 return true;
45499 /* All implementable with shufpd or unpck[lh]pd. */
45500 if (d.nelt == 2)
45501 return true;
45504 /* Extract the values from the vector CST into the permutation
45505 array in D. */
45506 memcpy (d.perm, sel, nelt);
45507 for (i = which = 0; i < nelt; ++i)
45509 unsigned char e = d.perm[i];
45510 gcc_assert (e < 2 * nelt);
45511 which |= (e < nelt ? 1 : 2);
45514 /* For all elements from second vector, fold the elements to first. */
45515 if (which == 2)
45516 for (i = 0; i < nelt; ++i)
45517 d.perm[i] -= nelt;
45519 /* Check whether the mask can be applied to the vector type. */
45520 d.one_operand_p = (which != 3);
45522 /* Implementable with shufps or pshufd. */
45523 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
45524 return true;
45526 /* Otherwise we have to go through the motions and see if we can
45527 figure out how to generate the requested permutation. */
45528 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
45529 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
45530 if (!d.one_operand_p)
45531 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
45533 start_sequence ();
45534 ret = ix86_expand_vec_perm_const_1 (&d);
45535 end_sequence ();
45537 return ret;
45540 void
45541 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
45543 struct expand_vec_perm_d d;
45544 unsigned i, nelt;
45546 d.target = targ;
45547 d.op0 = op0;
45548 d.op1 = op1;
45549 d.vmode = GET_MODE (targ);
45550 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45551 d.one_operand_p = false;
45552 d.testing_p = false;
45554 for (i = 0; i < nelt; ++i)
45555 d.perm[i] = i * 2 + odd;
45557 /* We'll either be able to implement the permutation directly... */
45558 if (expand_vec_perm_1 (&d))
45559 return;
45561 /* ... or we use the special-case patterns. */
45562 expand_vec_perm_even_odd_1 (&d, odd);
45565 static void
45566 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
45568 struct expand_vec_perm_d d;
45569 unsigned i, nelt, base;
45570 bool ok;
45572 d.target = targ;
45573 d.op0 = op0;
45574 d.op1 = op1;
45575 d.vmode = GET_MODE (targ);
45576 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45577 d.one_operand_p = false;
45578 d.testing_p = false;
45580 base = high_p ? nelt / 2 : 0;
45581 for (i = 0; i < nelt / 2; ++i)
45583 d.perm[i * 2] = i + base;
45584 d.perm[i * 2 + 1] = i + base + nelt;
45587 /* Note that for AVX this isn't one instruction. */
45588 ok = ix86_expand_vec_perm_const_1 (&d);
45589 gcc_assert (ok);
45593 /* Expand a vector operation CODE for a V*QImode in terms of the
45594 same operation on V*HImode. */
45596 void
45597 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
45599 enum machine_mode qimode = GET_MODE (dest);
45600 enum machine_mode himode;
45601 rtx (*gen_il) (rtx, rtx, rtx);
45602 rtx (*gen_ih) (rtx, rtx, rtx);
45603 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
45604 struct expand_vec_perm_d d;
45605 bool ok, full_interleave;
45606 bool uns_p = false;
45607 int i;
45609 switch (qimode)
45611 case V16QImode:
45612 himode = V8HImode;
45613 gen_il = gen_vec_interleave_lowv16qi;
45614 gen_ih = gen_vec_interleave_highv16qi;
45615 break;
45616 case V32QImode:
45617 himode = V16HImode;
45618 gen_il = gen_avx2_interleave_lowv32qi;
45619 gen_ih = gen_avx2_interleave_highv32qi;
45620 break;
45621 default:
45622 gcc_unreachable ();
45625 op2_l = op2_h = op2;
45626 switch (code)
45628 case MULT:
45629 /* Unpack data such that we've got a source byte in each low byte of
45630 each word. We don't care what goes into the high byte of each word.
45631 Rather than trying to get zero in there, most convenient is to let
45632 it be a copy of the low byte. */
45633 op2_l = gen_reg_rtx (qimode);
45634 op2_h = gen_reg_rtx (qimode);
45635 emit_insn (gen_il (op2_l, op2, op2));
45636 emit_insn (gen_ih (op2_h, op2, op2));
45637 /* FALLTHRU */
45639 op1_l = gen_reg_rtx (qimode);
45640 op1_h = gen_reg_rtx (qimode);
45641 emit_insn (gen_il (op1_l, op1, op1));
45642 emit_insn (gen_ih (op1_h, op1, op1));
45643 full_interleave = qimode == V16QImode;
45644 break;
45646 case ASHIFT:
45647 case LSHIFTRT:
45648 uns_p = true;
45649 /* FALLTHRU */
45650 case ASHIFTRT:
45651 op1_l = gen_reg_rtx (himode);
45652 op1_h = gen_reg_rtx (himode);
45653 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
45654 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
45655 full_interleave = true;
45656 break;
45657 default:
45658 gcc_unreachable ();
45661 /* Perform the operation. */
45662 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
45663 1, OPTAB_DIRECT);
45664 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
45665 1, OPTAB_DIRECT);
45666 gcc_assert (res_l && res_h);
45668 /* Merge the data back into the right place. */
45669 d.target = dest;
45670 d.op0 = gen_lowpart (qimode, res_l);
45671 d.op1 = gen_lowpart (qimode, res_h);
45672 d.vmode = qimode;
45673 d.nelt = GET_MODE_NUNITS (qimode);
45674 d.one_operand_p = false;
45675 d.testing_p = false;
45677 if (full_interleave)
45679 /* For SSE2, we used an full interleave, so the desired
45680 results are in the even elements. */
45681 for (i = 0; i < 32; ++i)
45682 d.perm[i] = i * 2;
45684 else
45686 /* For AVX, the interleave used above was not cross-lane. So the
45687 extraction is evens but with the second and third quarter swapped.
45688 Happily, that is even one insn shorter than even extraction. */
45689 for (i = 0; i < 32; ++i)
45690 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45693 ok = ix86_expand_vec_perm_const_1 (&d);
45694 gcc_assert (ok);
45696 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45697 gen_rtx_fmt_ee (code, qimode, op1, op2));
45700 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45701 if op is CONST_VECTOR with all odd elements equal to their
45702 preceding element. */
45704 static bool
45705 const_vector_equal_evenodd_p (rtx op)
45707 enum machine_mode mode = GET_MODE (op);
45708 int i, nunits = GET_MODE_NUNITS (mode);
45709 if (GET_CODE (op) != CONST_VECTOR
45710 || nunits != CONST_VECTOR_NUNITS (op))
45711 return false;
45712 for (i = 0; i < nunits; i += 2)
45713 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45714 return false;
45715 return true;
45718 void
45719 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45720 bool uns_p, bool odd_p)
45722 enum machine_mode mode = GET_MODE (op1);
45723 enum machine_mode wmode = GET_MODE (dest);
45724 rtx x;
45725 rtx orig_op1 = op1, orig_op2 = op2;
45727 if (!nonimmediate_operand (op1, mode))
45728 op1 = force_reg (mode, op1);
45729 if (!nonimmediate_operand (op2, mode))
45730 op2 = force_reg (mode, op2);
45732 /* We only play even/odd games with vectors of SImode. */
45733 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45735 /* If we're looking for the odd results, shift those members down to
45736 the even slots. For some cpus this is faster than a PSHUFD. */
45737 if (odd_p)
45739 /* For XOP use vpmacsdqh, but only for smult, as it is only
45740 signed. */
45741 if (TARGET_XOP && mode == V4SImode && !uns_p)
45743 x = force_reg (wmode, CONST0_RTX (wmode));
45744 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45745 return;
45748 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45749 if (!const_vector_equal_evenodd_p (orig_op1))
45750 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45751 x, NULL, 1, OPTAB_DIRECT);
45752 if (!const_vector_equal_evenodd_p (orig_op2))
45753 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45754 x, NULL, 1, OPTAB_DIRECT);
45755 op1 = gen_lowpart (mode, op1);
45756 op2 = gen_lowpart (mode, op2);
45759 if (mode == V16SImode)
45761 if (uns_p)
45762 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45763 else
45764 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45766 else if (mode == V8SImode)
45768 if (uns_p)
45769 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45770 else
45771 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45773 else if (uns_p)
45774 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45775 else if (TARGET_SSE4_1)
45776 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45777 else
45779 rtx s1, s2, t0, t1, t2;
45781 /* The easiest way to implement this without PMULDQ is to go through
45782 the motions as if we are performing a full 64-bit multiply. With
45783 the exception that we need to do less shuffling of the elements. */
45785 /* Compute the sign-extension, aka highparts, of the two operands. */
45786 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45787 op1, pc_rtx, pc_rtx);
45788 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45789 op2, pc_rtx, pc_rtx);
45791 /* Multiply LO(A) * HI(B), and vice-versa. */
45792 t1 = gen_reg_rtx (wmode);
45793 t2 = gen_reg_rtx (wmode);
45794 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45795 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45797 /* Multiply LO(A) * LO(B). */
45798 t0 = gen_reg_rtx (wmode);
45799 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45801 /* Combine and shift the highparts into place. */
45802 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45803 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45804 1, OPTAB_DIRECT);
45806 /* Combine high and low parts. */
45807 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45808 return;
45810 emit_insn (x);
45813 void
45814 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45815 bool uns_p, bool high_p)
45817 enum machine_mode wmode = GET_MODE (dest);
45818 enum machine_mode mode = GET_MODE (op1);
45819 rtx t1, t2, t3, t4, mask;
45821 switch (mode)
45823 case V4SImode:
45824 t1 = gen_reg_rtx (mode);
45825 t2 = gen_reg_rtx (mode);
45826 if (TARGET_XOP && !uns_p)
45828 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45829 shuffle the elements once so that all elements are in the right
45830 place for immediate use: { A C B D }. */
45831 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45832 const1_rtx, GEN_INT (3)));
45833 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45834 const1_rtx, GEN_INT (3)));
45836 else
45838 /* Put the elements into place for the multiply. */
45839 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45840 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45841 high_p = false;
45843 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45844 break;
45846 case V8SImode:
45847 /* Shuffle the elements between the lanes. After this we
45848 have { A B E F | C D G H } for each operand. */
45849 t1 = gen_reg_rtx (V4DImode);
45850 t2 = gen_reg_rtx (V4DImode);
45851 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45852 const0_rtx, const2_rtx,
45853 const1_rtx, GEN_INT (3)));
45854 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45855 const0_rtx, const2_rtx,
45856 const1_rtx, GEN_INT (3)));
45858 /* Shuffle the elements within the lanes. After this we
45859 have { A A B B | C C D D } or { E E F F | G G H H }. */
45860 t3 = gen_reg_rtx (V8SImode);
45861 t4 = gen_reg_rtx (V8SImode);
45862 mask = GEN_INT (high_p
45863 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45864 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45865 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45866 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45868 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45869 break;
45871 case V8HImode:
45872 case V16HImode:
45873 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45874 uns_p, OPTAB_DIRECT);
45875 t2 = expand_binop (mode,
45876 uns_p ? umul_highpart_optab : smul_highpart_optab,
45877 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45878 gcc_assert (t1 && t2);
45880 t3 = gen_reg_rtx (mode);
45881 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45882 emit_move_insn (dest, gen_lowpart (wmode, t3));
45883 break;
45885 case V16QImode:
45886 case V32QImode:
45887 t1 = gen_reg_rtx (wmode);
45888 t2 = gen_reg_rtx (wmode);
45889 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45890 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45892 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45893 break;
45895 default:
45896 gcc_unreachable ();
45900 void
45901 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45903 rtx res_1, res_2, res_3, res_4;
45905 res_1 = gen_reg_rtx (V4SImode);
45906 res_2 = gen_reg_rtx (V4SImode);
45907 res_3 = gen_reg_rtx (V2DImode);
45908 res_4 = gen_reg_rtx (V2DImode);
45909 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45910 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45912 /* Move the results in element 2 down to element 1; we don't care
45913 what goes in elements 2 and 3. Then we can merge the parts
45914 back together with an interleave.
45916 Note that two other sequences were tried:
45917 (1) Use interleaves at the start instead of psrldq, which allows
45918 us to use a single shufps to merge things back at the end.
45919 (2) Use shufps here to combine the two vectors, then pshufd to
45920 put the elements in the correct order.
45921 In both cases the cost of the reformatting stall was too high
45922 and the overall sequence slower. */
45924 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45925 const0_rtx, const2_rtx,
45926 const0_rtx, const0_rtx));
45927 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45928 const0_rtx, const2_rtx,
45929 const0_rtx, const0_rtx));
45930 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45932 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45935 void
45936 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45938 enum machine_mode mode = GET_MODE (op0);
45939 rtx t1, t2, t3, t4, t5, t6;
45941 if (TARGET_XOP && mode == V2DImode)
45943 /* op1: A,B,C,D, op2: E,F,G,H */
45944 op1 = gen_lowpart (V4SImode, op1);
45945 op2 = gen_lowpart (V4SImode, op2);
45947 t1 = gen_reg_rtx (V4SImode);
45948 t2 = gen_reg_rtx (V4SImode);
45949 t3 = gen_reg_rtx (V2DImode);
45950 t4 = gen_reg_rtx (V2DImode);
45952 /* t1: B,A,D,C */
45953 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45954 GEN_INT (1),
45955 GEN_INT (0),
45956 GEN_INT (3),
45957 GEN_INT (2)));
45959 /* t2: (B*E),(A*F),(D*G),(C*H) */
45960 emit_insn (gen_mulv4si3 (t2, t1, op2));
45962 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45963 emit_insn (gen_xop_phadddq (t3, t2));
45965 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45966 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45968 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45969 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45971 else
45973 enum machine_mode nmode;
45974 rtx (*umul) (rtx, rtx, rtx);
45976 if (mode == V2DImode)
45978 umul = gen_vec_widen_umult_even_v4si;
45979 nmode = V4SImode;
45981 else if (mode == V4DImode)
45983 umul = gen_vec_widen_umult_even_v8si;
45984 nmode = V8SImode;
45986 else if (mode == V8DImode)
45988 umul = gen_vec_widen_umult_even_v16si;
45989 nmode = V16SImode;
45991 else
45992 gcc_unreachable ();
45995 /* Multiply low parts. */
45996 t1 = gen_reg_rtx (mode);
45997 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45999 /* Shift input vectors right 32 bits so we can multiply high parts. */
46000 t6 = GEN_INT (32);
46001 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
46002 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
46004 /* Multiply high parts by low parts. */
46005 t4 = gen_reg_rtx (mode);
46006 t5 = gen_reg_rtx (mode);
46007 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
46008 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
46010 /* Combine and shift the highparts back. */
46011 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
46012 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
46014 /* Combine high and low parts. */
46015 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
46018 set_unique_reg_note (get_last_insn (), REG_EQUAL,
46019 gen_rtx_MULT (mode, op1, op2));
46022 /* Return 1 if control tansfer instruction INSN
46023 should be encoded with bnd prefix.
46024 If insn is NULL then return 1 when control
46025 transfer instructions should be prefixed with
46026 bnd by default for current function. */
46028 bool
46029 ix86_bnd_prefixed_insn_p (rtx insn)
46031 /* For call insns check special flag. */
46032 if (insn && CALL_P (insn))
46034 rtx call = get_call_rtx_from (insn);
46035 if (call)
46036 return CALL_EXPR_WITH_BOUNDS_P (call);
46039 /* All other insns are prefixed only if function is instrumented. */
46040 return chkp_function_instrumented_p (current_function_decl);
46043 /* Calculate integer abs() using only SSE2 instructions. */
46045 void
46046 ix86_expand_sse2_abs (rtx target, rtx input)
46048 enum machine_mode mode = GET_MODE (target);
46049 rtx tmp0, tmp1, x;
46051 switch (mode)
46053 /* For 32-bit signed integer X, the best way to calculate the absolute
46054 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
46055 case V4SImode:
46056 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
46057 GEN_INT (GET_MODE_BITSIZE
46058 (GET_MODE_INNER (mode)) - 1),
46059 NULL, 0, OPTAB_DIRECT);
46060 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
46061 NULL, 0, OPTAB_DIRECT);
46062 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
46063 target, 0, OPTAB_DIRECT);
46064 break;
46066 /* For 16-bit signed integer X, the best way to calculate the absolute
46067 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
46068 case V8HImode:
46069 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
46071 x = expand_simple_binop (mode, SMAX, tmp0, input,
46072 target, 0, OPTAB_DIRECT);
46073 break;
46075 /* For 8-bit signed integer X, the best way to calculate the absolute
46076 value of X is min ((unsigned char) X, (unsigned char) (-X)),
46077 as SSE2 provides the PMINUB insn. */
46078 case V16QImode:
46079 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
46081 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
46082 target, 0, OPTAB_DIRECT);
46083 break;
46085 default:
46086 gcc_unreachable ();
46089 if (x != target)
46090 emit_move_insn (target, x);
46093 /* Expand an insert into a vector register through pinsr insn.
46094 Return true if successful. */
46096 bool
46097 ix86_expand_pinsr (rtx *operands)
46099 rtx dst = operands[0];
46100 rtx src = operands[3];
46102 unsigned int size = INTVAL (operands[1]);
46103 unsigned int pos = INTVAL (operands[2]);
46105 if (GET_CODE (dst) == SUBREG)
46107 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
46108 dst = SUBREG_REG (dst);
46111 if (GET_CODE (src) == SUBREG)
46112 src = SUBREG_REG (src);
46114 switch (GET_MODE (dst))
46116 case V16QImode:
46117 case V8HImode:
46118 case V4SImode:
46119 case V2DImode:
46121 enum machine_mode srcmode, dstmode;
46122 rtx (*pinsr)(rtx, rtx, rtx, rtx);
46124 srcmode = mode_for_size (size, MODE_INT, 0);
46126 switch (srcmode)
46128 case QImode:
46129 if (!TARGET_SSE4_1)
46130 return false;
46131 dstmode = V16QImode;
46132 pinsr = gen_sse4_1_pinsrb;
46133 break;
46135 case HImode:
46136 if (!TARGET_SSE2)
46137 return false;
46138 dstmode = V8HImode;
46139 pinsr = gen_sse2_pinsrw;
46140 break;
46142 case SImode:
46143 if (!TARGET_SSE4_1)
46144 return false;
46145 dstmode = V4SImode;
46146 pinsr = gen_sse4_1_pinsrd;
46147 break;
46149 case DImode:
46150 gcc_assert (TARGET_64BIT);
46151 if (!TARGET_SSE4_1)
46152 return false;
46153 dstmode = V2DImode;
46154 pinsr = gen_sse4_1_pinsrq;
46155 break;
46157 default:
46158 return false;
46161 rtx d = dst;
46162 if (GET_MODE (dst) != dstmode)
46163 d = gen_reg_rtx (dstmode);
46164 src = gen_lowpart (srcmode, src);
46166 pos /= size;
46168 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
46169 GEN_INT (1 << pos)));
46170 if (d != dst)
46171 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
46172 return true;
46175 default:
46176 return false;
46180 /* This function returns the calling abi specific va_list type node.
46181 It returns the FNDECL specific va_list type. */
46183 static tree
46184 ix86_fn_abi_va_list (tree fndecl)
46186 if (!TARGET_64BIT)
46187 return va_list_type_node;
46188 gcc_assert (fndecl != NULL_TREE);
46190 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
46191 return ms_va_list_type_node;
46192 else
46193 return sysv_va_list_type_node;
46196 /* This function returns size of bounds for the calling abi
46197 specific va_list node. */
46199 static tree
46200 ix86_fn_abi_va_list_bounds_size (tree fndecl)
46202 if (!TARGET_64BIT)
46203 return integer_zero_node;
46204 gcc_assert (fndecl != NULL_TREE);
46206 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
46207 return integer_zero_node;
46208 else
46209 return TYPE_SIZE (sysv_va_list_type_node);
46212 /* Returns the canonical va_list type specified by TYPE. If there
46213 is no valid TYPE provided, it return NULL_TREE. */
46215 static tree
46216 ix86_canonical_va_list_type (tree type)
46218 tree wtype, htype;
46220 /* Resolve references and pointers to va_list type. */
46221 if (TREE_CODE (type) == MEM_REF)
46222 type = TREE_TYPE (type);
46223 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
46224 type = TREE_TYPE (type);
46225 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
46226 type = TREE_TYPE (type);
46228 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
46230 wtype = va_list_type_node;
46231 gcc_assert (wtype != NULL_TREE);
46232 htype = type;
46233 if (TREE_CODE (wtype) == ARRAY_TYPE)
46235 /* If va_list is an array type, the argument may have decayed
46236 to a pointer type, e.g. by being passed to another function.
46237 In that case, unwrap both types so that we can compare the
46238 underlying records. */
46239 if (TREE_CODE (htype) == ARRAY_TYPE
46240 || POINTER_TYPE_P (htype))
46242 wtype = TREE_TYPE (wtype);
46243 htype = TREE_TYPE (htype);
46246 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46247 return va_list_type_node;
46248 wtype = sysv_va_list_type_node;
46249 gcc_assert (wtype != NULL_TREE);
46250 htype = type;
46251 if (TREE_CODE (wtype) == ARRAY_TYPE)
46253 /* If va_list is an array type, the argument may have decayed
46254 to a pointer type, e.g. by being passed to another function.
46255 In that case, unwrap both types so that we can compare the
46256 underlying records. */
46257 if (TREE_CODE (htype) == ARRAY_TYPE
46258 || POINTER_TYPE_P (htype))
46260 wtype = TREE_TYPE (wtype);
46261 htype = TREE_TYPE (htype);
46264 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46265 return sysv_va_list_type_node;
46266 wtype = ms_va_list_type_node;
46267 gcc_assert (wtype != NULL_TREE);
46268 htype = type;
46269 if (TREE_CODE (wtype) == ARRAY_TYPE)
46271 /* If va_list is an array type, the argument may have decayed
46272 to a pointer type, e.g. by being passed to another function.
46273 In that case, unwrap both types so that we can compare the
46274 underlying records. */
46275 if (TREE_CODE (htype) == ARRAY_TYPE
46276 || POINTER_TYPE_P (htype))
46278 wtype = TREE_TYPE (wtype);
46279 htype = TREE_TYPE (htype);
46282 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46283 return ms_va_list_type_node;
46284 return NULL_TREE;
46286 return std_canonical_va_list_type (type);
46289 /* Iterate through the target-specific builtin types for va_list.
46290 IDX denotes the iterator, *PTREE is set to the result type of
46291 the va_list builtin, and *PNAME to its internal type.
46292 Returns zero if there is no element for this index, otherwise
46293 IDX should be increased upon the next call.
46294 Note, do not iterate a base builtin's name like __builtin_va_list.
46295 Used from c_common_nodes_and_builtins. */
46297 static int
46298 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
46300 if (TARGET_64BIT)
46302 switch (idx)
46304 default:
46305 break;
46307 case 0:
46308 *ptree = ms_va_list_type_node;
46309 *pname = "__builtin_ms_va_list";
46310 return 1;
46312 case 1:
46313 *ptree = sysv_va_list_type_node;
46314 *pname = "__builtin_sysv_va_list";
46315 return 1;
46319 return 0;
46322 #undef TARGET_SCHED_DISPATCH
46323 #define TARGET_SCHED_DISPATCH has_dispatch
46324 #undef TARGET_SCHED_DISPATCH_DO
46325 #define TARGET_SCHED_DISPATCH_DO do_dispatch
46326 #undef TARGET_SCHED_REASSOCIATION_WIDTH
46327 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
46328 #undef TARGET_SCHED_REORDER
46329 #define TARGET_SCHED_REORDER ix86_sched_reorder
46330 #undef TARGET_SCHED_ADJUST_PRIORITY
46331 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
46332 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
46333 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
46334 ix86_dependencies_evaluation_hook
46336 /* The size of the dispatch window is the total number of bytes of
46337 object code allowed in a window. */
46338 #define DISPATCH_WINDOW_SIZE 16
46340 /* Number of dispatch windows considered for scheduling. */
46341 #define MAX_DISPATCH_WINDOWS 3
46343 /* Maximum number of instructions in a window. */
46344 #define MAX_INSN 4
46346 /* Maximum number of immediate operands in a window. */
46347 #define MAX_IMM 4
46349 /* Maximum number of immediate bits allowed in a window. */
46350 #define MAX_IMM_SIZE 128
46352 /* Maximum number of 32 bit immediates allowed in a window. */
46353 #define MAX_IMM_32 4
46355 /* Maximum number of 64 bit immediates allowed in a window. */
46356 #define MAX_IMM_64 2
46358 /* Maximum total of loads or prefetches allowed in a window. */
46359 #define MAX_LOAD 2
46361 /* Maximum total of stores allowed in a window. */
46362 #define MAX_STORE 1
46364 #undef BIG
46365 #define BIG 100
46368 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
46369 enum dispatch_group {
46370 disp_no_group = 0,
46371 disp_load,
46372 disp_store,
46373 disp_load_store,
46374 disp_prefetch,
46375 disp_imm,
46376 disp_imm_32,
46377 disp_imm_64,
46378 disp_branch,
46379 disp_cmp,
46380 disp_jcc,
46381 disp_last
46384 /* Number of allowable groups in a dispatch window. It is an array
46385 indexed by dispatch_group enum. 100 is used as a big number,
46386 because the number of these kind of operations does not have any
46387 effect in dispatch window, but we need them for other reasons in
46388 the table. */
46389 static unsigned int num_allowable_groups[disp_last] = {
46390 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
46393 char group_name[disp_last + 1][16] = {
46394 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
46395 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
46396 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
46399 /* Instruction path. */
46400 enum insn_path {
46401 no_path = 0,
46402 path_single, /* Single micro op. */
46403 path_double, /* Double micro op. */
46404 path_multi, /* Instructions with more than 2 micro op.. */
46405 last_path
46408 /* sched_insn_info defines a window to the instructions scheduled in
46409 the basic block. It contains a pointer to the insn_info table and
46410 the instruction scheduled.
46412 Windows are allocated for each basic block and are linked
46413 together. */
46414 typedef struct sched_insn_info_s {
46415 rtx insn;
46416 enum dispatch_group group;
46417 enum insn_path path;
46418 int byte_len;
46419 int imm_bytes;
46420 } sched_insn_info;
46422 /* Linked list of dispatch windows. This is a two way list of
46423 dispatch windows of a basic block. It contains information about
46424 the number of uops in the window and the total number of
46425 instructions and of bytes in the object code for this dispatch
46426 window. */
46427 typedef struct dispatch_windows_s {
46428 int num_insn; /* Number of insn in the window. */
46429 int num_uops; /* Number of uops in the window. */
46430 int window_size; /* Number of bytes in the window. */
46431 int window_num; /* Window number between 0 or 1. */
46432 int num_imm; /* Number of immediates in an insn. */
46433 int num_imm_32; /* Number of 32 bit immediates in an insn. */
46434 int num_imm_64; /* Number of 64 bit immediates in an insn. */
46435 int imm_size; /* Total immediates in the window. */
46436 int num_loads; /* Total memory loads in the window. */
46437 int num_stores; /* Total memory stores in the window. */
46438 int violation; /* Violation exists in window. */
46439 sched_insn_info *window; /* Pointer to the window. */
46440 struct dispatch_windows_s *next;
46441 struct dispatch_windows_s *prev;
46442 } dispatch_windows;
46444 /* Immediate valuse used in an insn. */
46445 typedef struct imm_info_s
46447 int imm;
46448 int imm32;
46449 int imm64;
46450 } imm_info;
46452 static dispatch_windows *dispatch_window_list;
46453 static dispatch_windows *dispatch_window_list1;
46455 /* Get dispatch group of insn. */
46457 static enum dispatch_group
46458 get_mem_group (rtx insn)
46460 enum attr_memory memory;
46462 if (INSN_CODE (insn) < 0)
46463 return disp_no_group;
46464 memory = get_attr_memory (insn);
46465 if (memory == MEMORY_STORE)
46466 return disp_store;
46468 if (memory == MEMORY_LOAD)
46469 return disp_load;
46471 if (memory == MEMORY_BOTH)
46472 return disp_load_store;
46474 return disp_no_group;
46477 /* Return true if insn is a compare instruction. */
46479 static bool
46480 is_cmp (rtx insn)
46482 enum attr_type type;
46484 type = get_attr_type (insn);
46485 return (type == TYPE_TEST
46486 || type == TYPE_ICMP
46487 || type == TYPE_FCMP
46488 || GET_CODE (PATTERN (insn)) == COMPARE);
46491 /* Return true if a dispatch violation encountered. */
46493 static bool
46494 dispatch_violation (void)
46496 if (dispatch_window_list->next)
46497 return dispatch_window_list->next->violation;
46498 return dispatch_window_list->violation;
46501 /* Return true if insn is a branch instruction. */
46503 static bool
46504 is_branch (rtx insn)
46506 return (CALL_P (insn) || JUMP_P (insn));
46509 /* Return true if insn is a prefetch instruction. */
46511 static bool
46512 is_prefetch (rtx insn)
46514 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
46517 /* This function initializes a dispatch window and the list container holding a
46518 pointer to the window. */
46520 static void
46521 init_window (int window_num)
46523 int i;
46524 dispatch_windows *new_list;
46526 if (window_num == 0)
46527 new_list = dispatch_window_list;
46528 else
46529 new_list = dispatch_window_list1;
46531 new_list->num_insn = 0;
46532 new_list->num_uops = 0;
46533 new_list->window_size = 0;
46534 new_list->next = NULL;
46535 new_list->prev = NULL;
46536 new_list->window_num = window_num;
46537 new_list->num_imm = 0;
46538 new_list->num_imm_32 = 0;
46539 new_list->num_imm_64 = 0;
46540 new_list->imm_size = 0;
46541 new_list->num_loads = 0;
46542 new_list->num_stores = 0;
46543 new_list->violation = false;
46545 for (i = 0; i < MAX_INSN; i++)
46547 new_list->window[i].insn = NULL;
46548 new_list->window[i].group = disp_no_group;
46549 new_list->window[i].path = no_path;
46550 new_list->window[i].byte_len = 0;
46551 new_list->window[i].imm_bytes = 0;
46553 return;
46556 /* This function allocates and initializes a dispatch window and the
46557 list container holding a pointer to the window. */
46559 static dispatch_windows *
46560 allocate_window (void)
46562 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
46563 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
46565 return new_list;
46568 /* This routine initializes the dispatch scheduling information. It
46569 initiates building dispatch scheduler tables and constructs the
46570 first dispatch window. */
46572 static void
46573 init_dispatch_sched (void)
46575 /* Allocate a dispatch list and a window. */
46576 dispatch_window_list = allocate_window ();
46577 dispatch_window_list1 = allocate_window ();
46578 init_window (0);
46579 init_window (1);
46582 /* This function returns true if a branch is detected. End of a basic block
46583 does not have to be a branch, but here we assume only branches end a
46584 window. */
46586 static bool
46587 is_end_basic_block (enum dispatch_group group)
46589 return group == disp_branch;
46592 /* This function is called when the end of a window processing is reached. */
46594 static void
46595 process_end_window (void)
46597 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
46598 if (dispatch_window_list->next)
46600 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
46601 gcc_assert (dispatch_window_list->window_size
46602 + dispatch_window_list1->window_size <= 48);
46603 init_window (1);
46605 init_window (0);
46608 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
46609 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
46610 for 48 bytes of instructions. Note that these windows are not dispatch
46611 windows that their sizes are DISPATCH_WINDOW_SIZE. */
46613 static dispatch_windows *
46614 allocate_next_window (int window_num)
46616 if (window_num == 0)
46618 if (dispatch_window_list->next)
46619 init_window (1);
46620 init_window (0);
46621 return dispatch_window_list;
46624 dispatch_window_list->next = dispatch_window_list1;
46625 dispatch_window_list1->prev = dispatch_window_list;
46627 return dispatch_window_list1;
46630 /* Increment the number of immediate operands of an instruction. */
46632 static int
46633 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
46635 if (*in_rtx == 0)
46636 return 0;
46638 switch ( GET_CODE (*in_rtx))
46640 case CONST:
46641 case SYMBOL_REF:
46642 case CONST_INT:
46643 (imm_values->imm)++;
46644 if (x86_64_immediate_operand (*in_rtx, SImode))
46645 (imm_values->imm32)++;
46646 else
46647 (imm_values->imm64)++;
46648 break;
46650 case CONST_DOUBLE:
46651 (imm_values->imm)++;
46652 (imm_values->imm64)++;
46653 break;
46655 case CODE_LABEL:
46656 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
46658 (imm_values->imm)++;
46659 (imm_values->imm32)++;
46661 break;
46663 default:
46664 break;
46667 return 0;
46670 /* Compute number of immediate operands of an instruction. */
46672 static void
46673 find_constant (rtx in_rtx, imm_info *imm_values)
46675 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
46676 (rtx_function) find_constant_1, (void *) imm_values);
46679 /* Return total size of immediate operands of an instruction along with number
46680 of corresponding immediate-operands. It initializes its parameters to zero
46681 befor calling FIND_CONSTANT.
46682 INSN is the input instruction. IMM is the total of immediates.
46683 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46684 bit immediates. */
46686 static int
46687 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
46689 imm_info imm_values = {0, 0, 0};
46691 find_constant (insn, &imm_values);
46692 *imm = imm_values.imm;
46693 *imm32 = imm_values.imm32;
46694 *imm64 = imm_values.imm64;
46695 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
46698 /* This function indicates if an operand of an instruction is an
46699 immediate. */
46701 static bool
46702 has_immediate (rtx insn)
46704 int num_imm_operand;
46705 int num_imm32_operand;
46706 int num_imm64_operand;
46708 if (insn)
46709 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46710 &num_imm64_operand);
46711 return false;
46714 /* Return single or double path for instructions. */
46716 static enum insn_path
46717 get_insn_path (rtx insn)
46719 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46721 if ((int)path == 0)
46722 return path_single;
46724 if ((int)path == 1)
46725 return path_double;
46727 return path_multi;
46730 /* Return insn dispatch group. */
46732 static enum dispatch_group
46733 get_insn_group (rtx insn)
46735 enum dispatch_group group = get_mem_group (insn);
46736 if (group)
46737 return group;
46739 if (is_branch (insn))
46740 return disp_branch;
46742 if (is_cmp (insn))
46743 return disp_cmp;
46745 if (has_immediate (insn))
46746 return disp_imm;
46748 if (is_prefetch (insn))
46749 return disp_prefetch;
46751 return disp_no_group;
46754 /* Count number of GROUP restricted instructions in a dispatch
46755 window WINDOW_LIST. */
46757 static int
46758 count_num_restricted (rtx insn, dispatch_windows *window_list)
46760 enum dispatch_group group = get_insn_group (insn);
46761 int imm_size;
46762 int num_imm_operand;
46763 int num_imm32_operand;
46764 int num_imm64_operand;
46766 if (group == disp_no_group)
46767 return 0;
46769 if (group == disp_imm)
46771 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46772 &num_imm64_operand);
46773 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46774 || num_imm_operand + window_list->num_imm > MAX_IMM
46775 || (num_imm32_operand > 0
46776 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46777 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46778 || (num_imm64_operand > 0
46779 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46780 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46781 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46782 && num_imm64_operand > 0
46783 && ((window_list->num_imm_64 > 0
46784 && window_list->num_insn >= 2)
46785 || window_list->num_insn >= 3)))
46786 return BIG;
46788 return 1;
46791 if ((group == disp_load_store
46792 && (window_list->num_loads >= MAX_LOAD
46793 || window_list->num_stores >= MAX_STORE))
46794 || ((group == disp_load
46795 || group == disp_prefetch)
46796 && window_list->num_loads >= MAX_LOAD)
46797 || (group == disp_store
46798 && window_list->num_stores >= MAX_STORE))
46799 return BIG;
46801 return 1;
46804 /* This function returns true if insn satisfies dispatch rules on the
46805 last window scheduled. */
46807 static bool
46808 fits_dispatch_window (rtx insn)
46810 dispatch_windows *window_list = dispatch_window_list;
46811 dispatch_windows *window_list_next = dispatch_window_list->next;
46812 unsigned int num_restrict;
46813 enum dispatch_group group = get_insn_group (insn);
46814 enum insn_path path = get_insn_path (insn);
46815 int sum;
46817 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46818 instructions should be given the lowest priority in the
46819 scheduling process in Haifa scheduler to make sure they will be
46820 scheduled in the same dispatch window as the reference to them. */
46821 if (group == disp_jcc || group == disp_cmp)
46822 return false;
46824 /* Check nonrestricted. */
46825 if (group == disp_no_group || group == disp_branch)
46826 return true;
46828 /* Get last dispatch window. */
46829 if (window_list_next)
46830 window_list = window_list_next;
46832 if (window_list->window_num == 1)
46834 sum = window_list->prev->window_size + window_list->window_size;
46836 if (sum == 32
46837 || (min_insn_size (insn) + sum) >= 48)
46838 /* Window 1 is full. Go for next window. */
46839 return true;
46842 num_restrict = count_num_restricted (insn, window_list);
46844 if (num_restrict > num_allowable_groups[group])
46845 return false;
46847 /* See if it fits in the first window. */
46848 if (window_list->window_num == 0)
46850 /* The first widow should have only single and double path
46851 uops. */
46852 if (path == path_double
46853 && (window_list->num_uops + 2) > MAX_INSN)
46854 return false;
46855 else if (path != path_single)
46856 return false;
46858 return true;
46861 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46862 dispatch window WINDOW_LIST. */
46864 static void
46865 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46867 int byte_len = min_insn_size (insn);
46868 int num_insn = window_list->num_insn;
46869 int imm_size;
46870 sched_insn_info *window = window_list->window;
46871 enum dispatch_group group = get_insn_group (insn);
46872 enum insn_path path = get_insn_path (insn);
46873 int num_imm_operand;
46874 int num_imm32_operand;
46875 int num_imm64_operand;
46877 if (!window_list->violation && group != disp_cmp
46878 && !fits_dispatch_window (insn))
46879 window_list->violation = true;
46881 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46882 &num_imm64_operand);
46884 /* Initialize window with new instruction. */
46885 window[num_insn].insn = insn;
46886 window[num_insn].byte_len = byte_len;
46887 window[num_insn].group = group;
46888 window[num_insn].path = path;
46889 window[num_insn].imm_bytes = imm_size;
46891 window_list->window_size += byte_len;
46892 window_list->num_insn = num_insn + 1;
46893 window_list->num_uops = window_list->num_uops + num_uops;
46894 window_list->imm_size += imm_size;
46895 window_list->num_imm += num_imm_operand;
46896 window_list->num_imm_32 += num_imm32_operand;
46897 window_list->num_imm_64 += num_imm64_operand;
46899 if (group == disp_store)
46900 window_list->num_stores += 1;
46901 else if (group == disp_load
46902 || group == disp_prefetch)
46903 window_list->num_loads += 1;
46904 else if (group == disp_load_store)
46906 window_list->num_stores += 1;
46907 window_list->num_loads += 1;
46911 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46912 If the total bytes of instructions or the number of instructions in
46913 the window exceed allowable, it allocates a new window. */
46915 static void
46916 add_to_dispatch_window (rtx insn)
46918 int byte_len;
46919 dispatch_windows *window_list;
46920 dispatch_windows *next_list;
46921 dispatch_windows *window0_list;
46922 enum insn_path path;
46923 enum dispatch_group insn_group;
46924 bool insn_fits;
46925 int num_insn;
46926 int num_uops;
46927 int window_num;
46928 int insn_num_uops;
46929 int sum;
46931 if (INSN_CODE (insn) < 0)
46932 return;
46934 byte_len = min_insn_size (insn);
46935 window_list = dispatch_window_list;
46936 next_list = window_list->next;
46937 path = get_insn_path (insn);
46938 insn_group = get_insn_group (insn);
46940 /* Get the last dispatch window. */
46941 if (next_list)
46942 window_list = dispatch_window_list->next;
46944 if (path == path_single)
46945 insn_num_uops = 1;
46946 else if (path == path_double)
46947 insn_num_uops = 2;
46948 else
46949 insn_num_uops = (int) path;
46951 /* If current window is full, get a new window.
46952 Window number zero is full, if MAX_INSN uops are scheduled in it.
46953 Window number one is full, if window zero's bytes plus window
46954 one's bytes is 32, or if the bytes of the new instruction added
46955 to the total makes it greater than 48, or it has already MAX_INSN
46956 instructions in it. */
46957 num_insn = window_list->num_insn;
46958 num_uops = window_list->num_uops;
46959 window_num = window_list->window_num;
46960 insn_fits = fits_dispatch_window (insn);
46962 if (num_insn >= MAX_INSN
46963 || num_uops + insn_num_uops > MAX_INSN
46964 || !(insn_fits))
46966 window_num = ~window_num & 1;
46967 window_list = allocate_next_window (window_num);
46970 if (window_num == 0)
46972 add_insn_window (insn, window_list, insn_num_uops);
46973 if (window_list->num_insn >= MAX_INSN
46974 && insn_group == disp_branch)
46976 process_end_window ();
46977 return;
46980 else if (window_num == 1)
46982 window0_list = window_list->prev;
46983 sum = window0_list->window_size + window_list->window_size;
46984 if (sum == 32
46985 || (byte_len + sum) >= 48)
46987 process_end_window ();
46988 window_list = dispatch_window_list;
46991 add_insn_window (insn, window_list, insn_num_uops);
46993 else
46994 gcc_unreachable ();
46996 if (is_end_basic_block (insn_group))
46998 /* End of basic block is reached do end-basic-block process. */
46999 process_end_window ();
47000 return;
47004 /* Print the dispatch window, WINDOW_NUM, to FILE. */
47006 DEBUG_FUNCTION static void
47007 debug_dispatch_window_file (FILE *file, int window_num)
47009 dispatch_windows *list;
47010 int i;
47012 if (window_num == 0)
47013 list = dispatch_window_list;
47014 else
47015 list = dispatch_window_list1;
47017 fprintf (file, "Window #%d:\n", list->window_num);
47018 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
47019 list->num_insn, list->num_uops, list->window_size);
47020 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
47021 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
47023 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
47024 list->num_stores);
47025 fprintf (file, " insn info:\n");
47027 for (i = 0; i < MAX_INSN; i++)
47029 if (!list->window[i].insn)
47030 break;
47031 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
47032 i, group_name[list->window[i].group],
47033 i, (void *)list->window[i].insn,
47034 i, list->window[i].path,
47035 i, list->window[i].byte_len,
47036 i, list->window[i].imm_bytes);
47040 /* Print to stdout a dispatch window. */
47042 DEBUG_FUNCTION void
47043 debug_dispatch_window (int window_num)
47045 debug_dispatch_window_file (stdout, window_num);
47048 /* Print INSN dispatch information to FILE. */
47050 DEBUG_FUNCTION static void
47051 debug_insn_dispatch_info_file (FILE *file, rtx insn)
47053 int byte_len;
47054 enum insn_path path;
47055 enum dispatch_group group;
47056 int imm_size;
47057 int num_imm_operand;
47058 int num_imm32_operand;
47059 int num_imm64_operand;
47061 if (INSN_CODE (insn) < 0)
47062 return;
47064 byte_len = min_insn_size (insn);
47065 path = get_insn_path (insn);
47066 group = get_insn_group (insn);
47067 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
47068 &num_imm64_operand);
47070 fprintf (file, " insn info:\n");
47071 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
47072 group_name[group], path, byte_len);
47073 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
47074 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
47077 /* Print to STDERR the status of the ready list with respect to
47078 dispatch windows. */
47080 DEBUG_FUNCTION void
47081 debug_ready_dispatch (void)
47083 int i;
47084 int no_ready = number_in_ready ();
47086 fprintf (stdout, "Number of ready: %d\n", no_ready);
47088 for (i = 0; i < no_ready; i++)
47089 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
47092 /* This routine is the driver of the dispatch scheduler. */
47094 static void
47095 do_dispatch (rtx insn, int mode)
47097 if (mode == DISPATCH_INIT)
47098 init_dispatch_sched ();
47099 else if (mode == ADD_TO_DISPATCH_WINDOW)
47100 add_to_dispatch_window (insn);
47103 /* Return TRUE if Dispatch Scheduling is supported. */
47105 static bool
47106 has_dispatch (rtx insn, int action)
47108 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
47109 && flag_dispatch_scheduler)
47110 switch (action)
47112 default:
47113 return false;
47115 case IS_DISPATCH_ON:
47116 return true;
47117 break;
47119 case IS_CMP:
47120 return is_cmp (insn);
47122 case DISPATCH_VIOLATION:
47123 return dispatch_violation ();
47125 case FITS_DISPATCH_WINDOW:
47126 return fits_dispatch_window (insn);
47129 return false;
47132 /* Implementation of reassociation_width target hook used by
47133 reassoc phase to identify parallelism level in reassociated
47134 tree. Statements tree_code is passed in OPC. Arguments type
47135 is passed in MODE.
47137 Currently parallel reassociation is enabled for Atom
47138 processors only and we set reassociation width to be 2
47139 because Atom may issue up to 2 instructions per cycle.
47141 Return value should be fixed if parallel reassociation is
47142 enabled for other processors. */
47144 static int
47145 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
47146 enum machine_mode mode)
47148 int res = 1;
47150 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
47151 res = 2;
47152 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
47153 res = 2;
47155 return res;
47158 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
47159 place emms and femms instructions. */
47161 static enum machine_mode
47162 ix86_preferred_simd_mode (enum machine_mode mode)
47164 if (!TARGET_SSE)
47165 return word_mode;
47167 switch (mode)
47169 case QImode:
47170 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
47171 case HImode:
47172 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
47173 case SImode:
47174 return TARGET_AVX512F ? V16SImode :
47175 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
47176 case DImode:
47177 return TARGET_AVX512F ? V8DImode :
47178 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
47180 case SFmode:
47181 if (TARGET_AVX512F)
47182 return V16SFmode;
47183 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
47184 return V8SFmode;
47185 else
47186 return V4SFmode;
47188 case DFmode:
47189 if (!TARGET_VECTORIZE_DOUBLE)
47190 return word_mode;
47191 else if (TARGET_AVX512F)
47192 return V8DFmode;
47193 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
47194 return V4DFmode;
47195 else if (TARGET_SSE2)
47196 return V2DFmode;
47197 /* FALLTHRU */
47199 default:
47200 return word_mode;
47204 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
47205 vectors. If AVX512F is enabled then try vectorizing with 512bit,
47206 256bit and 128bit vectors. */
47208 static unsigned int
47209 ix86_autovectorize_vector_sizes (void)
47211 return TARGET_AVX512F ? 64 | 32 | 16 :
47212 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
47217 /* Return class of registers which could be used for pseudo of MODE
47218 and of class RCLASS for spilling instead of memory. Return NO_REGS
47219 if it is not possible or non-profitable. */
47220 static reg_class_t
47221 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
47223 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
47224 && (mode == SImode || (TARGET_64BIT && mode == DImode))
47225 && INTEGER_CLASS_P (rclass))
47226 return ALL_SSE_REGS;
47227 return NO_REGS;
47230 /* Implement targetm.vectorize.init_cost. */
47232 static void *
47233 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
47235 unsigned *cost = XNEWVEC (unsigned, 3);
47236 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
47237 return cost;
47240 /* Implement targetm.vectorize.add_stmt_cost. */
47242 static unsigned
47243 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
47244 struct _stmt_vec_info *stmt_info, int misalign,
47245 enum vect_cost_model_location where)
47247 unsigned *cost = (unsigned *) data;
47248 unsigned retval = 0;
47250 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
47251 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
47253 /* Statements in an inner loop relative to the loop being
47254 vectorized are weighted more heavily. The value here is
47255 arbitrary and could potentially be improved with analysis. */
47256 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
47257 count *= 50; /* FIXME. */
47259 retval = (unsigned) (count * stmt_cost);
47260 cost[where] += retval;
47262 return retval;
47265 /* Implement targetm.vectorize.finish_cost. */
47267 static void
47268 ix86_finish_cost (void *data, unsigned *prologue_cost,
47269 unsigned *body_cost, unsigned *epilogue_cost)
47271 unsigned *cost = (unsigned *) data;
47272 *prologue_cost = cost[vect_prologue];
47273 *body_cost = cost[vect_body];
47274 *epilogue_cost = cost[vect_epilogue];
47277 /* Implement targetm.vectorize.destroy_cost_data. */
47279 static void
47280 ix86_destroy_cost_data (void *data)
47282 free (data);
47285 /* Validate target specific memory model bits in VAL. */
47287 static unsigned HOST_WIDE_INT
47288 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
47290 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
47291 bool strong;
47293 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
47294 |MEMMODEL_MASK)
47295 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
47297 warning (OPT_Winvalid_memory_model,
47298 "Unknown architecture specific memory model");
47299 return MEMMODEL_SEQ_CST;
47301 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
47302 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
47304 warning (OPT_Winvalid_memory_model,
47305 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
47306 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
47308 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
47310 warning (OPT_Winvalid_memory_model,
47311 "HLE_RELEASE not used with RELEASE or stronger memory model");
47312 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
47314 return val;
47317 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
47318 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
47319 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
47320 or number of vecsize_mangle variants that should be emitted. */
47322 static int
47323 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
47324 struct cgraph_simd_clone *clonei,
47325 tree base_type, int num)
47327 int ret = 1;
47329 if (clonei->simdlen
47330 && (clonei->simdlen < 2
47331 || clonei->simdlen > 16
47332 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
47334 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47335 "unsupported simdlen %d", clonei->simdlen);
47336 return 0;
47339 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
47340 if (TREE_CODE (ret_type) != VOID_TYPE)
47341 switch (TYPE_MODE (ret_type))
47343 case QImode:
47344 case HImode:
47345 case SImode:
47346 case DImode:
47347 case SFmode:
47348 case DFmode:
47349 /* case SCmode: */
47350 /* case DCmode: */
47351 break;
47352 default:
47353 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47354 "unsupported return type %qT for simd\n", ret_type);
47355 return 0;
47358 tree t;
47359 int i;
47361 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
47362 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
47363 switch (TYPE_MODE (TREE_TYPE (t)))
47365 case QImode:
47366 case HImode:
47367 case SImode:
47368 case DImode:
47369 case SFmode:
47370 case DFmode:
47371 /* case SCmode: */
47372 /* case DCmode: */
47373 break;
47374 default:
47375 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47376 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
47377 return 0;
47380 if (clonei->cilk_elemental)
47382 /* Parse here processor clause. If not present, default to 'b'. */
47383 clonei->vecsize_mangle = 'b';
47385 else if (!TREE_PUBLIC (node->decl))
47387 /* If the function isn't exported, we can pick up just one ISA
47388 for the clones. */
47389 if (TARGET_AVX2)
47390 clonei->vecsize_mangle = 'd';
47391 else if (TARGET_AVX)
47392 clonei->vecsize_mangle = 'c';
47393 else
47394 clonei->vecsize_mangle = 'b';
47395 ret = 1;
47397 else
47399 clonei->vecsize_mangle = "bcd"[num];
47400 ret = 3;
47402 switch (clonei->vecsize_mangle)
47404 case 'b':
47405 clonei->vecsize_int = 128;
47406 clonei->vecsize_float = 128;
47407 break;
47408 case 'c':
47409 clonei->vecsize_int = 128;
47410 clonei->vecsize_float = 256;
47411 break;
47412 case 'd':
47413 clonei->vecsize_int = 256;
47414 clonei->vecsize_float = 256;
47415 break;
47417 if (clonei->simdlen == 0)
47419 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
47420 clonei->simdlen = clonei->vecsize_int;
47421 else
47422 clonei->simdlen = clonei->vecsize_float;
47423 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
47424 if (clonei->simdlen > 16)
47425 clonei->simdlen = 16;
47427 return ret;
47430 /* Add target attribute to SIMD clone NODE if needed. */
47432 static void
47433 ix86_simd_clone_adjust (struct cgraph_node *node)
47435 const char *str = NULL;
47436 gcc_assert (node->decl == cfun->decl);
47437 switch (node->simdclone->vecsize_mangle)
47439 case 'b':
47440 if (!TARGET_SSE2)
47441 str = "sse2";
47442 break;
47443 case 'c':
47444 if (!TARGET_AVX)
47445 str = "avx";
47446 break;
47447 case 'd':
47448 if (!TARGET_AVX2)
47449 str = "avx2";
47450 break;
47451 default:
47452 gcc_unreachable ();
47454 if (str == NULL)
47455 return;
47456 push_cfun (NULL);
47457 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
47458 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
47459 gcc_assert (ok);
47460 pop_cfun ();
47461 ix86_previous_fndecl = NULL_TREE;
47462 ix86_set_current_function (node->decl);
47465 /* If SIMD clone NODE can't be used in a vectorized loop
47466 in current function, return -1, otherwise return a badness of using it
47467 (0 if it is most desirable from vecsize_mangle point of view, 1
47468 slightly less desirable, etc.). */
47470 static int
47471 ix86_simd_clone_usable (struct cgraph_node *node)
47473 switch (node->simdclone->vecsize_mangle)
47475 case 'b':
47476 if (!TARGET_SSE2)
47477 return -1;
47478 if (!TARGET_AVX)
47479 return 0;
47480 return TARGET_AVX2 ? 2 : 1;
47481 case 'c':
47482 if (!TARGET_AVX)
47483 return -1;
47484 return TARGET_AVX2 ? 1 : 0;
47485 break;
47486 case 'd':
47487 if (!TARGET_AVX2)
47488 return -1;
47489 return 0;
47490 default:
47491 gcc_unreachable ();
47495 /* This function gives out the number of memory references.
47496 This value determines the unrolling factor for
47497 bdver3 and bdver4 architectures. */
47499 static int
47500 ix86_loop_memcount (rtx *x, unsigned *mem_count)
47502 if (*x != NULL_RTX && MEM_P (*x))
47504 enum machine_mode mode;
47505 unsigned int n_words;
47507 mode = GET_MODE (*x);
47508 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
47510 if (n_words > 4)
47511 (*mem_count)+=2;
47512 else
47513 (*mem_count)+=1;
47515 return 0;
47518 /* This function adjusts the unroll factor based on
47519 the hardware capabilities. For ex, bdver3 has
47520 a loop buffer which makes unrolling of smaller
47521 loops less important. This function decides the
47522 unroll factor using number of memory references
47523 (value 32 is used) as a heuristic. */
47525 static unsigned
47526 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
47528 basic_block *bbs;
47529 rtx insn;
47530 unsigned i;
47531 unsigned mem_count = 0;
47533 if (!TARGET_ADJUST_UNROLL)
47534 return nunroll;
47536 /* Count the number of memory references within the loop body. */
47537 bbs = get_loop_body (loop);
47538 for (i = 0; i < loop->num_nodes; i++)
47540 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
47541 if (NONDEBUG_INSN_P (insn))
47542 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
47544 free (bbs);
47546 if (mem_count && mem_count <=32)
47547 return 32/mem_count;
47549 return nunroll;
47553 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
47555 static bool
47556 ix86_float_exceptions_rounding_supported_p (void)
47558 /* For x87 floating point with standard excess precision handling,
47559 there is no adddf3 pattern (since x87 floating point only has
47560 XFmode operations) so the default hook implementation gets this
47561 wrong. */
47562 return TARGET_80387 || TARGET_SSE_MATH;
47565 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
47567 static void
47568 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
47570 if (!TARGET_80387 && !TARGET_SSE_MATH)
47571 return;
47572 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
47573 if (TARGET_80387)
47575 tree fenv_index_type = build_index_type (size_int (6));
47576 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
47577 tree fenv_var = create_tmp_var (fenv_type, NULL);
47578 mark_addressable (fenv_var);
47579 tree fenv_ptr = build_pointer_type (fenv_type);
47580 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
47581 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
47582 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
47583 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
47584 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
47585 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
47586 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
47587 tree hold_fnclex = build_call_expr (fnclex, 0);
47588 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
47589 hold_fnclex);
47590 *clear = build_call_expr (fnclex, 0);
47591 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
47592 mark_addressable (sw_var);
47593 tree su_ptr = build_pointer_type (short_unsigned_type_node);
47594 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
47595 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
47596 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
47597 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
47598 exceptions_var, exceptions_x87);
47599 *update = build2 (COMPOUND_EXPR, integer_type_node,
47600 fnstsw_call, update_mod);
47601 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
47602 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
47604 if (TARGET_SSE_MATH)
47606 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
47607 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
47608 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
47609 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
47610 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
47611 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
47612 mxcsr_orig_var, stmxcsr_hold_call);
47613 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
47614 mxcsr_orig_var,
47615 build_int_cst (unsigned_type_node, 0x1f80));
47616 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
47617 build_int_cst (unsigned_type_node, 0xffffffc0));
47618 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
47619 mxcsr_mod_var, hold_mod_val);
47620 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47621 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
47622 hold_assign_orig, hold_assign_mod);
47623 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
47624 ldmxcsr_hold_call);
47625 if (*hold)
47626 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
47627 else
47628 *hold = hold_all;
47629 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47630 if (*clear)
47631 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
47632 ldmxcsr_clear_call);
47633 else
47634 *clear = ldmxcsr_clear_call;
47635 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
47636 tree exceptions_sse = fold_convert (integer_type_node,
47637 stxmcsr_update_call);
47638 if (*update)
47640 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
47641 exceptions_var, exceptions_sse);
47642 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
47643 exceptions_var, exceptions_mod);
47644 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
47645 exceptions_assign);
47647 else
47648 *update = build2 (MODIFY_EXPR, integer_type_node,
47649 exceptions_var, exceptions_sse);
47650 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
47651 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47652 ldmxcsr_update_call);
47654 tree atomic_feraiseexcept
47655 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
47656 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
47657 1, exceptions_var);
47658 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47659 atomic_feraiseexcept_call);
47662 static enum machine_mode
47663 ix86_mpx_bound_mode ()
47665 /* Do not support pointer checker if MPX
47666 is not enabled. */
47667 if (!TARGET_MPX)
47669 if (flag_check_pointer_bounds)
47670 warning (0, "Pointer Checker requires MPX support on this target."
47671 " Use -mmpx options to enable MPX.");
47672 return VOIDmode;
47675 return BNDmode;
47678 static tree
47679 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
47681 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
47682 : build_zero_cst (pointer_sized_int_node);
47683 tree high = ub ? build_zero_cst (pointer_sized_int_node)
47684 : build_minus_one_cst (pointer_sized_int_node);
47686 /* This function is supposed to be used to create zero and
47687 none bounds only. */
47688 gcc_assert (lb == 0 || lb == -1);
47689 gcc_assert (ub == 0 || ub == -1);
47691 return build_complex (NULL, low, high);
47694 static int
47695 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
47697 tree size_ptr = build_pointer_type (size_type_node);
47698 tree lhs, modify, var_p;
47700 ub = build1 (BIT_NOT_EXPR, size_type_node, ub);
47701 var_p = build1 (CONVERT_EXPR, size_ptr,
47702 build_fold_addr_expr (var));
47704 lhs = build1 (INDIRECT_REF, size_type_node, var_p);
47705 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
47706 append_to_statement_list (modify, stmts);
47708 lhs = build1 (INDIRECT_REF, size_type_node,
47709 build2 (POINTER_PLUS_EXPR, size_ptr, var_p,
47710 TYPE_SIZE_UNIT (size_type_node)));
47711 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
47712 append_to_statement_list (modify, stmts);
47714 return 2;
47717 /* Initialize the GCC target structure. */
47718 #undef TARGET_RETURN_IN_MEMORY
47719 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47721 #undef TARGET_LEGITIMIZE_ADDRESS
47722 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47724 #undef TARGET_ATTRIBUTE_TABLE
47725 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47726 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47727 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47728 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47729 # undef TARGET_MERGE_DECL_ATTRIBUTES
47730 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47731 #endif
47733 #undef TARGET_COMP_TYPE_ATTRIBUTES
47734 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47736 #undef TARGET_INIT_BUILTINS
47737 #define TARGET_INIT_BUILTINS ix86_init_builtins
47738 #undef TARGET_BUILTIN_DECL
47739 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47740 #undef TARGET_EXPAND_BUILTIN
47741 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47743 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47744 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47745 ix86_builtin_vectorized_function
47747 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47748 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47750 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47751 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47753 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47754 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47756 #undef TARGET_BUILTIN_RECIPROCAL
47757 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47759 #undef TARGET_ASM_FUNCTION_EPILOGUE
47760 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47762 #undef TARGET_ENCODE_SECTION_INFO
47763 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47764 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47765 #else
47766 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47767 #endif
47769 #undef TARGET_ASM_OPEN_PAREN
47770 #define TARGET_ASM_OPEN_PAREN ""
47771 #undef TARGET_ASM_CLOSE_PAREN
47772 #define TARGET_ASM_CLOSE_PAREN ""
47774 #undef TARGET_ASM_BYTE_OP
47775 #define TARGET_ASM_BYTE_OP ASM_BYTE
47777 #undef TARGET_ASM_ALIGNED_HI_OP
47778 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47779 #undef TARGET_ASM_ALIGNED_SI_OP
47780 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47781 #ifdef ASM_QUAD
47782 #undef TARGET_ASM_ALIGNED_DI_OP
47783 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47784 #endif
47786 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47787 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47789 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47790 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47792 #undef TARGET_ASM_UNALIGNED_HI_OP
47793 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47794 #undef TARGET_ASM_UNALIGNED_SI_OP
47795 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47796 #undef TARGET_ASM_UNALIGNED_DI_OP
47797 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47799 #undef TARGET_PRINT_OPERAND
47800 #define TARGET_PRINT_OPERAND ix86_print_operand
47801 #undef TARGET_PRINT_OPERAND_ADDRESS
47802 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47803 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47804 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47805 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47806 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47808 #undef TARGET_SCHED_INIT_GLOBAL
47809 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47810 #undef TARGET_SCHED_ADJUST_COST
47811 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47812 #undef TARGET_SCHED_ISSUE_RATE
47813 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47814 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47815 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47816 ia32_multipass_dfa_lookahead
47817 #undef TARGET_SCHED_MACRO_FUSION_P
47818 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47819 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47820 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47822 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47823 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47825 #undef TARGET_MEMMODEL_CHECK
47826 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47828 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47829 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47831 #ifdef HAVE_AS_TLS
47832 #undef TARGET_HAVE_TLS
47833 #define TARGET_HAVE_TLS true
47834 #endif
47835 #undef TARGET_CANNOT_FORCE_CONST_MEM
47836 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47837 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47838 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47840 #undef TARGET_DELEGITIMIZE_ADDRESS
47841 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47843 #undef TARGET_MS_BITFIELD_LAYOUT_P
47844 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47846 #if TARGET_MACHO
47847 #undef TARGET_BINDS_LOCAL_P
47848 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47849 #endif
47850 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47851 #undef TARGET_BINDS_LOCAL_P
47852 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47853 #endif
47855 #undef TARGET_ASM_OUTPUT_MI_THUNK
47856 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47857 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47858 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47860 #undef TARGET_ASM_FILE_START
47861 #define TARGET_ASM_FILE_START x86_file_start
47863 #undef TARGET_OPTION_OVERRIDE
47864 #define TARGET_OPTION_OVERRIDE ix86_option_override
47866 #undef TARGET_REGISTER_MOVE_COST
47867 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47868 #undef TARGET_MEMORY_MOVE_COST
47869 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47870 #undef TARGET_RTX_COSTS
47871 #define TARGET_RTX_COSTS ix86_rtx_costs
47872 #undef TARGET_ADDRESS_COST
47873 #define TARGET_ADDRESS_COST ix86_address_cost
47875 #undef TARGET_FIXED_CONDITION_CODE_REGS
47876 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47877 #undef TARGET_CC_MODES_COMPATIBLE
47878 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47880 #undef TARGET_MACHINE_DEPENDENT_REORG
47881 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47883 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47884 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47886 #undef TARGET_BUILD_BUILTIN_VA_LIST
47887 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47889 #undef TARGET_FOLD_BUILTIN
47890 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47892 #undef TARGET_COMPARE_VERSION_PRIORITY
47893 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47895 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47896 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47897 ix86_generate_version_dispatcher_body
47899 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47900 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47901 ix86_get_function_versions_dispatcher
47903 #undef TARGET_ENUM_VA_LIST_P
47904 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47906 #undef TARGET_FN_ABI_VA_LIST
47907 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47909 #undef TARGET_CANONICAL_VA_LIST_TYPE
47910 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47912 #undef TARGET_EXPAND_BUILTIN_VA_START
47913 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47915 #undef TARGET_MD_ASM_CLOBBERS
47916 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47918 #undef TARGET_PROMOTE_PROTOTYPES
47919 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47920 #undef TARGET_SETUP_INCOMING_VARARGS
47921 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47922 #undef TARGET_MUST_PASS_IN_STACK
47923 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47924 #undef TARGET_FUNCTION_ARG_ADVANCE
47925 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47926 #undef TARGET_FUNCTION_ARG
47927 #define TARGET_FUNCTION_ARG ix86_function_arg
47928 #undef TARGET_FUNCTION_ARG_BOUNDARY
47929 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47930 #undef TARGET_PASS_BY_REFERENCE
47931 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47932 #undef TARGET_INTERNAL_ARG_POINTER
47933 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47934 #undef TARGET_UPDATE_STACK_BOUNDARY
47935 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47936 #undef TARGET_GET_DRAP_RTX
47937 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47938 #undef TARGET_STRICT_ARGUMENT_NAMING
47939 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47940 #undef TARGET_STATIC_CHAIN
47941 #define TARGET_STATIC_CHAIN ix86_static_chain
47942 #undef TARGET_TRAMPOLINE_INIT
47943 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47944 #undef TARGET_RETURN_POPS_ARGS
47945 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47947 #undef TARGET_LEGITIMATE_COMBINED_INSN
47948 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47950 #undef TARGET_ASAN_SHADOW_OFFSET
47951 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47953 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47954 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47956 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47957 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47959 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47960 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47962 #undef TARGET_C_MODE_FOR_SUFFIX
47963 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47965 #ifdef HAVE_AS_TLS
47966 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47967 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47968 #endif
47970 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47971 #undef TARGET_INSERT_ATTRIBUTES
47972 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47973 #endif
47975 #undef TARGET_MANGLE_TYPE
47976 #define TARGET_MANGLE_TYPE ix86_mangle_type
47978 #if !TARGET_MACHO
47979 #undef TARGET_STACK_PROTECT_FAIL
47980 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47981 #endif
47983 #undef TARGET_FUNCTION_VALUE
47984 #define TARGET_FUNCTION_VALUE ix86_function_value
47986 #undef TARGET_FUNCTION_VALUE_REGNO_P
47987 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47989 #undef TARGET_PROMOTE_FUNCTION_MODE
47990 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47992 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47993 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47995 #undef TARGET_INSTANTIATE_DECLS
47996 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47998 #undef TARGET_SECONDARY_RELOAD
47999 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
48001 #undef TARGET_CLASS_MAX_NREGS
48002 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
48004 #undef TARGET_PREFERRED_RELOAD_CLASS
48005 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
48006 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
48007 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
48008 #undef TARGET_CLASS_LIKELY_SPILLED_P
48009 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
48011 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
48012 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
48013 ix86_builtin_vectorization_cost
48014 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
48015 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
48016 ix86_vectorize_vec_perm_const_ok
48017 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
48018 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
48019 ix86_preferred_simd_mode
48020 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
48021 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
48022 ix86_autovectorize_vector_sizes
48023 #undef TARGET_VECTORIZE_INIT_COST
48024 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
48025 #undef TARGET_VECTORIZE_ADD_STMT_COST
48026 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
48027 #undef TARGET_VECTORIZE_FINISH_COST
48028 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
48029 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
48030 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
48032 #undef TARGET_SET_CURRENT_FUNCTION
48033 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
48035 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
48036 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
48038 #undef TARGET_OPTION_SAVE
48039 #define TARGET_OPTION_SAVE ix86_function_specific_save
48041 #undef TARGET_OPTION_RESTORE
48042 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
48044 #undef TARGET_OPTION_PRINT
48045 #define TARGET_OPTION_PRINT ix86_function_specific_print
48047 #undef TARGET_OPTION_FUNCTION_VERSIONS
48048 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
48050 #undef TARGET_CAN_INLINE_P
48051 #define TARGET_CAN_INLINE_P ix86_can_inline_p
48053 #undef TARGET_EXPAND_TO_RTL_HOOK
48054 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
48056 #undef TARGET_LEGITIMATE_ADDRESS_P
48057 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
48059 #undef TARGET_LRA_P
48060 #define TARGET_LRA_P hook_bool_void_true
48062 #undef TARGET_REGISTER_PRIORITY
48063 #define TARGET_REGISTER_PRIORITY ix86_register_priority
48065 #undef TARGET_REGISTER_USAGE_LEVELING_P
48066 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
48068 #undef TARGET_LEGITIMATE_CONSTANT_P
48069 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
48071 #undef TARGET_FRAME_POINTER_REQUIRED
48072 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
48074 #undef TARGET_CAN_ELIMINATE
48075 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
48077 #undef TARGET_EXTRA_LIVE_ON_ENTRY
48078 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
48080 #undef TARGET_ASM_CODE_END
48081 #define TARGET_ASM_CODE_END ix86_code_end
48083 #undef TARGET_CONDITIONAL_REGISTER_USAGE
48084 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
48086 #if TARGET_MACHO
48087 #undef TARGET_INIT_LIBFUNCS
48088 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
48089 #endif
48091 #undef TARGET_LOOP_UNROLL_ADJUST
48092 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
48094 #undef TARGET_SPILL_CLASS
48095 #define TARGET_SPILL_CLASS ix86_spill_class
48097 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
48098 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
48099 ix86_simd_clone_compute_vecsize_and_simdlen
48101 #undef TARGET_SIMD_CLONE_ADJUST
48102 #define TARGET_SIMD_CLONE_ADJUST \
48103 ix86_simd_clone_adjust
48105 #undef TARGET_SIMD_CLONE_USABLE
48106 #define TARGET_SIMD_CLONE_USABLE \
48107 ix86_simd_clone_usable
48109 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
48110 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
48111 ix86_float_exceptions_rounding_supported_p
48113 #undef TARGET_LOAD_BOUNDS_FOR_ARG
48114 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
48116 #undef TARGET_STORE_BOUNDS_FOR_ARG
48117 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
48119 #undef TARGET_LOAD_RETURNED_BOUNDS
48120 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
48122 #undef TARGET_STORE_RETURNED_BOUNDS
48123 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
48125 #undef TARGET_CHKP_BOUND_MODE
48126 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
48128 #undef TARGET_BUILTIN_CHKP_FUNCTION
48129 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
48131 #undef TARGET_FN_ABI_VA_LIST_BOUNDS_SIZE
48132 #define TARGET_FN_ABI_VA_LIST_BOUNDS_SIZE ix86_fn_abi_va_list_bounds_size
48134 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
48135 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
48137 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
48138 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
48140 #undef TARGET_CHKP_INITIALIZE_BOUNDS
48141 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
48143 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
48144 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
48146 struct gcc_target targetm = TARGET_INITIALIZER;
48148 #include "gt-i386.h"